[PATCH 2/2] iommu/amd: Remove performance counter pre-initialization test

2021-04-09 Thread Suravee Suthikulpanit
In early AMD desktop/mobile platforms (during 2013), when the IOMMU
Performance Counter (PMC) support was first introduced in
commit 30861ddc9cca ("perf/x86/amd: Add IOMMU Performance Counter
resource management"), there was a HW bug where the counters could not
be accessed. The result was reading of the counter always return zero.

At the time, the suggested workaround was to add a test logic prior
to initializing the PMC feature to check if the counters can be programmed
and read back the same value. This has been working fine until the more
recent desktop/mobile platforms start enabling power gating for the PMC,
which prevents access to the counters. This results in the PMC support
being disabled unnecesarily.

Unfortunatly, there is no documentation of since which generation
of hardware the original PMC HW bug was fixed. Although, it was fixed
soon after the first introduction of the PMC. Base on this, we assume
that the buggy platforms are less likely to be in used, and it should
be relatively safe to remove this legacy logic.

Link: 
https://lore.kernel.org/linux-iommu/alpine.lnx.3.20.13.2006030935570.3...@monopod.intra.ispras.ru/
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753
Cc: Tj (Elloe Linux) 
Cc: Shuah Khan 
Cc: Alexander Monakov 
Cc: David Coe 
Cc: Paul Menzel 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c | 24 +---
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 648cdfd03074..247cdda5d683 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1714,33 +1714,16 @@ static int __init init_iommu_all(struct 
acpi_table_header *table)
return 0;
 }
 
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
-   u8 fxn, u64 *value, bool is_write);
-
 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
 {
+   u64 val;
struct pci_dev *pdev = iommu->dev;
-   u64 val = 0xabcd, val2 = 0, save_reg = 0;
 
if (!iommu_feature(iommu, FEATURE_PC))
return;
 
amd_iommu_pc_present = true;
 
-   /* save the value to restore, if writable */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
-   goto pc_false;
-
-   /* Check if the performance counters can be written to */
-   if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
-   (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
-   (val != val2))
-   goto pc_false;
-
-   /* restore */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
-   goto pc_false;
-
pci_info(pdev, "IOMMU performance counters supported\n");
 
val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
@@ -1748,11 +1731,6 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
iommu->max_counters = (u8) ((val >> 7) & 0xf);
 
return;
-
-pc_false:
-   pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
-   amd_iommu_pc_present = false;
-   return;
 }
 
 static ssize_t amd_iommu_show_cap(struct device *dev,
-- 
2.17.1



[PATCH 1/2] Revert "iommu/amd: Fix performance counter initialization"

2021-04-09 Thread Suravee Suthikulpanit
From: Paul Menzel 

This reverts commit 6778ff5b21bd8e78c8bd547fd66437cf2657fd9b.

The original commit tries to address an issue, where PMC power-gating
causing the IOMMU PMC pre-init test to fail on certain desktop/mobile
platforms where the power-gating is normally enabled.

There have been several reports that the workaround still does not
guarantee to work, and can add up to 100 ms (on the worst case)
to the boot process on certain platforms such as the MSI B350M MORTAR
with AMD Ryzen 3 2200G.

Therefore, revert this commit as a prelude to removing the pre-init
test.

Link: 
https://lore.kernel.org/linux-iommu/alpine.lnx.3.20.13.2006030935570.3...@monopod.intra.ispras.ru/
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753
Cc: Tj (Elloe Linux) 
Cc: Shuah Khan 
Cc: Alexander Monakov 
Cc: David Coe 
Signed-off-by: Paul Menzel 
Signed-off-by: Suravee Suthikulpanit 
---
Note: I have revised the commit message to add more detail
  and remove uncessary information.

 drivers/iommu/amd/init.c | 45 ++--
 1 file changed, 11 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 321f5906e6ed..648cdfd03074 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,7 +12,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -257,8 +256,6 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
 static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
-   u8 fxn, u64 *value, bool is_write);
 
 static bool amd_iommu_pre_enabled = true;
 
@@ -1717,11 +1714,13 @@ static int __init init_iommu_all(struct 
acpi_table_header *table)
return 0;
 }
 
-static void __init init_iommu_perf_ctr(struct amd_iommu *iommu)
+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+   u8 fxn, u64 *value, bool is_write);
+
+static void init_iommu_perf_ctr(struct amd_iommu *iommu)
 {
-   int retry;
struct pci_dev *pdev = iommu->dev;
-   u64 val = 0xabcd, val2 = 0, save_reg, save_src;
+   u64 val = 0xabcd, val2 = 0, save_reg = 0;
 
if (!iommu_feature(iommu, FEATURE_PC))
return;
@@ -1729,39 +1728,17 @@ static void __init init_iommu_perf_ctr(struct amd_iommu 
*iommu)
amd_iommu_pc_present = true;
 
/* save the value to restore, if writable */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) ||
-   iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false))
-   goto pc_false;
-
-   /*
-* Disable power gating by programing the performance counter
-* source to 20 (i.e. counts the reads and writes from/to IOMMU
-* Reserved Register [MMIO Offset 1FF8h] that are ignored.),
-* which never get incremented during this init phase.
-* (Note: The event is also deprecated.)
-*/
-   val = 20;
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true))
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
goto pc_false;
 
/* Check if the performance counters can be written to */
-   val = 0xabcd;
-   for (retry = 5; retry; retry--) {
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) ||
-   iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) ||
-   val2)
-   break;
-
-   /* Wait about 20 msec for power gating to disable and retry. */
-   msleep(20);
-   }
-
-   /* restore */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) ||
-   iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true))
+   if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
+   (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
+   (val != val2))
goto pc_false;
 
-   if (val != val2)
+   /* restore */
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
goto pc_false;
 
pci_info(pdev, "IOMMU performance counters supported\n");
-- 
2.17.1



[PATCH 0/2] iommu/amd: Revert and remove failing PMC test

2021-04-09 Thread Suravee Suthikulpanit
This has prevented PMC to work on more recent desktop/mobile platforms,
where the PMC power-gating is normally enabled. After consulting
with HW designers and IOMMU maintainer, we have decide to remove
the legacy test altogether to avoid future PMC enabling issues.

Thanks the community for helping to test, investigate, provide data
and report issues on several platforms in the field.

Regards,
Suravee 

Paul Menzel (1):
  Revert "iommu/amd: Fix performance counter initialization"

Suravee Suthikulpanit (1):
  iommu/amd: Remove performance counter pre-initialization test

 drivers/iommu/amd/init.c | 49 ++--
 1 file changed, 2 insertions(+), 47 deletions(-)

-- 
2.17.1



Re: [RFC PATCH 5/7] iommu/amd: Add support for Guest IO protection

2021-03-25 Thread Suravee Suthikulpanit

Joerg,

On 3/18/21 10:31 PM, Joerg Roedel wrote:

On Fri, Mar 12, 2021 at 03:04:09AM -0600, Suravee Suthikulpanit wrote:

@@ -519,6 +521,7 @@ struct protection_domain {
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int glx;/* Number of levels for GCR3 table */
+   bool giov;  /* guest IO protection domain */


Could this be turned into a flag?



Good point. I'll convert to use the protection_domain.flags.

Thanks,
Suravee


Re: [RFC PATCH 6/7] iommu/amd: Introduce amd_iommu_pgtable command-line option

2021-03-21 Thread Suravee Suthikulpanit

Joerg,

On 3/18/21 10:33 PM, Joerg Roedel wrote:

On Fri, Mar 12, 2021 at 03:04:10AM -0600, Suravee Suthikulpanit wrote:

To allow specification whether to use v1 or v2 IOMMU pagetable for
DMA remapping when calling kernel DMA-API.

Signed-off-by: Suravee Suthikulpanit 
---
  Documentation/admin-guide/kernel-parameters.txt |  6 ++
  drivers/iommu/amd/init.c| 15 +++
  2 files changed, 21 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..466e807369ea 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -319,6 +319,12 @@
 This mode requires kvm-amd.avic=1.
 (Default when IOMMU HW support is present.)
  
+	amd_iommu_pgtable= [HW,X86-64]

+   Specifies one of the following AMD IOMMU page table to
+   be used for DMA remapping for DMA-API:
+   v1 - Use v1 page table (Default)
+   v2 - Use v2 page table


Any reason v2 can not be the default when it is supported by the IOMMU?



Eventually, we should be able to default to v2. However, we will need to make 
sure that
the v2 implementation will have comparable performance as currently used v1.

FYI: I'm also looking into adding support for SVA as well.

Thanks,
Suravee


[RFC PATCH 7/7] iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API

2021-03-12 Thread Suravee Suthikulpanit
Introduce init function for setting up DMA domain for DMA-API with
the IOMMU v2 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/iommu.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index e29ece6e1e68..bd26de8764bd 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1937,6 +1937,24 @@ static int protection_domain_init_v1(struct 
protection_domain *domain, int mode)
return 0;
 }
 
+static int protection_domain_init_v2(struct protection_domain *domain)
+{
+   spin_lock_init(&domain->lock);
+   domain->id = domain_id_alloc();
+   if (!domain->id)
+   return -ENOMEM;
+   INIT_LIST_HEAD(&domain->dev_list);
+
+   domain->giov = true;
+
+   if (amd_iommu_pgtable == AMD_IOMMU_V2 &&
+   domain_enable_v2(domain, 1, false)) {
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
 static struct protection_domain *protection_domain_alloc(unsigned int type)
 {
struct io_pgtable_ops *pgtbl_ops;
@@ -1964,6 +1982,9 @@ static struct protection_domain 
*protection_domain_alloc(unsigned int type)
case AMD_IOMMU_V1:
ret = protection_domain_init_v1(domain, mode);
break;
+   case AMD_IOMMU_V2:
+   ret = protection_domain_init_v2(domain);
+   break;
default:
ret = -EINVAL;
}
-- 
2.17.1



[RFC PATCH 6/7] iommu/amd: Introduce amd_iommu_pgtable command-line option

2021-03-12 Thread Suravee Suthikulpanit
To allow specification whether to use v1 or v2 IOMMU pagetable for
DMA remapping when calling kernel DMA-API.

Signed-off-by: Suravee Suthikulpanit 
---
 Documentation/admin-guide/kernel-parameters.txt |  6 ++
 drivers/iommu/amd/init.c| 15 +++
 2 files changed, 21 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..466e807369ea 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -319,6 +319,12 @@
 This mode requires kvm-amd.avic=1.
 (Default when IOMMU HW support is present.)
 
+   amd_iommu_pgtable= [HW,X86-64]
+   Specifies one of the following AMD IOMMU page table to
+   be used for DMA remapping for DMA-API:
+   v1 - Use v1 page table (Default)
+   v2 - Use v2 page table
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: ,
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 9265c1bf1d84..6d5163bfb87e 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3123,6 +3123,20 @@ static int __init parse_amd_iommu_dump(char *str)
return 1;
 }
 
+static int __init parse_amd_iommu_pgtable(char *str)
+{
+   for (; *str; ++str) {
+   if (strncmp(str, "v1", 2) == 0) {
+   amd_iommu_pgtable = AMD_IOMMU_V1;
+   break;
+   } else if (strncmp(str, "v2", 2) == 0) {
+   amd_iommu_pgtable = AMD_IOMMU_V2;
+   break;
+   }
+   }
+   return 1;
+}
+
 static int __init parse_amd_iommu_intr(char *str)
 {
for (; *str; ++str) {
@@ -3246,6 +3260,7 @@ static int __init parse_ivrs_acpihid(char *str)
 
 __setup("amd_iommu_dump",  parse_amd_iommu_dump);
 __setup("amd_iommu=",  parse_amd_iommu_options);
+__setup("amd_iommu_pgtable=",  parse_amd_iommu_pgtable);
 __setup("amd_iommu_intr=", parse_amd_iommu_intr);
 __setup("ivrs_ioapic", parse_ivrs_ioapic);
 __setup("ivrs_hpet",   parse_ivrs_hpet);
-- 
2.17.1



[RFC PATCH 3/7] iommu/amd: Decouple the logic to enable PPR and GT

2021-03-12 Thread Suravee Suthikulpanit
Currently, the function to enable iommu v2 (GT) assumes PPR log
must also be enabled. This is no longer the case since the IOMMU
v2 page table can be enabled without PRR support (for DMA-API
use case).

Therefore, separate the enabling logic for PPR and GT.
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c | 19 +--
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 9126efcbaf2c..5def566de6f6 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -898,14 +898,6 @@ static void iommu_enable_xt(struct amd_iommu *iommu)
 #endif /* CONFIG_IRQ_REMAP */
 }
 
-static void iommu_enable_gt(struct amd_iommu *iommu)
-{
-   if (!iommu_feature(iommu, FEATURE_GT))
-   return;
-
-   iommu_feature_enable(iommu, CONTROL_GT_EN);
-}
-
 /* sets a specific bit in the device table entry. */
 static void set_dev_entry_bit(u16 devid, u8 bit)
 {
@@ -1882,6 +1874,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_max_glx_val = glxval;
else
amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, 
glxval);
+   iommu_feature_enable(iommu, CONTROL_GT_EN);
}
 
if (iommu_feature(iommu, FEATURE_GT) &&
@@ -2530,21 +2523,19 @@ static void early_enable_iommus(void)
 #endif
 }
 
-static void enable_iommus_v2(void)
+static void enable_iommus_ppr(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
+   for_each_iommu(iommu)
iommu_enable_ppr_log(iommu);
-   iommu_enable_gt(iommu);
-   }
 }
 
 static void enable_iommus(void)
 {
early_enable_iommus();
 
-   enable_iommus_v2();
+   enable_iommus_ppr();
 }
 
 static void disable_iommus(void)
@@ -2935,7 +2926,7 @@ static int __init state_next(void)
register_syscore_ops(&amd_iommu_syscore_ops);
ret = amd_iommu_init_pci();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
-   enable_iommus_v2();
+   enable_iommus_ppr();
break;
case IOMMU_PCI_INIT:
ret = amd_iommu_enable_interrupts();
-- 
2.17.1



[RFC PATCH 5/7] iommu/amd: Add support for Guest IO protection

2021-03-12 Thread Suravee Suthikulpanit
AMD IOMMU introduces support for Guest I/O protection where the request
from the I/O device without a PASID are treated as if they have PASID 0.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h | 3 +++
 drivers/iommu/amd/init.c| 8 
 drivers/iommu/amd/iommu.c   | 4 
 3 files changed, 15 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 25062eb86c8b..876ba1adf73e 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -93,6 +93,7 @@
 #define FEATURE_HE (1ULL<<8)
 #define FEATURE_PC (1ULL<<9)
 #define FEATURE_GAM_VAPIC  (1ULL<<21)
+#define FEATURE_GIOSUP (1ULL<<48)
 #define FEATURE_EPHSUP (1ULL<<50)
 #define FEATURE_SNP(1ULL<<63)
 
@@ -366,6 +367,7 @@
 #define DTE_FLAG_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_GIOV  (1ULL << 54)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
@@ -519,6 +521,7 @@ struct protection_domain {
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int glx;/* Number of levels for GCR3 table */
+   bool giov;  /* guest IO protection domain */
u64 *gcr3_tbl;  /* Guest CR3 table */
unsigned long flags;/* flags to find out type of domain */
unsigned dev_cnt;   /* devices assigned to this domain */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 5def566de6f6..9265c1bf1d84 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1895,6 +1895,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 
init_iommu_perf_ctr(iommu);
 
+   if (amd_iommu_pgtable == AMD_IOMMU_V2 &&
+   !iommu_feature(iommu, FEATURE_GIOSUP)) {
+   pr_warn("Cannot enable v2 page table for DMA-API. Fallback to 
v1.\n");
+   amd_iommu_pgtable = AMD_IOMMU_V1;
+   }
+
if (is_rd890_iommu(iommu->dev)) {
int i, j;
 
@@ -1969,6 +1975,8 @@ static void print_iommu_info(void)
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
pr_info("X2APIC enabled\n");
}
+   if (amd_iommu_pgtable == AMD_IOMMU_V2)
+   pr_info("GIOV enabled\n");
 }
 
 static int __init amd_iommu_init_pci(void)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f3800efdbb29..e29ece6e1e68 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1405,6 +1405,10 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain,
 
pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
+
+   if (domain->giov && (domain->flags & PD_IOMMUV2_MASK))
+   pte_root |= DTE_FLAG_GIOV;
+
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
-- 
2.17.1



[RFC PATCH 4/7] iommu/amd: Initial support for AMD IOMMU v2 page table

2021-03-12 Thread Suravee Suthikulpanit
Introduce IO page table framework support for AMD IOMMU v2 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/Makefile  |   2 +-
 drivers/iommu/amd/amd_iommu_types.h |   2 +
 drivers/iommu/amd/io_pgtable_v2.c   | 239 
 drivers/iommu/io-pgtable.c  |   1 +
 include/linux/io-pgtable.h  |   2 +
 5 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/amd/io_pgtable_v2.c

diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index a935f8f4b974..773d8aa00283 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 6937e3674a16..25062eb86c8b 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -265,6 +265,7 @@
  * 512GB Pages are not supported due to a hardware bug
  */
 #define AMD_IOMMU_PGSIZES  ((~0xFFFUL) & ~(2ULL << 38))
+#define AMD_IOMMU_PGSIZES_V2   (PAGE_SIZE | (1ULL << 12) | (1ULL << 30))
 
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
@@ -503,6 +504,7 @@ struct amd_io_pgtable {
int mode;
u64 *root;
atomic64_t  pt_root;/* pgtable root and pgtable mode */
+   struct mm_structv2_mm;
 };
 
 /*
diff --git a/drivers/iommu/amd/io_pgtable_v2.c 
b/drivers/iommu/amd/io_pgtable_v2.c
new file mode 100644
index ..b0b6ba2d8d35
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table v2 allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit 
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt)pr_fmt(fmt)
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static pte_t *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long iova,
+ unsigned long *page_size)
+{
+   int level;
+   pte_t *ptep;
+
+   ptep = lookup_address_in_mm(&pgtable->v2_mm, iova, &level);
+   if (!ptep || pte_none(*ptep) || (level == PG_LEVEL_NONE))
+   return NULL;
+
+   *page_size = PTE_LEVEL_PAGE_SIZE(level-1);
+   return ptep;
+}
+
+static pte_t *v2_pte_alloc_map(struct mm_struct *mm, unsigned long vaddr)
+{
+   pgd_t *pgd;
+   p4d_t *p4d;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+
+   pgd = pgd_offset(mm, vaddr);
+   p4d = p4d_alloc(mm, pgd, vaddr);
+   if (!p4d)
+   return NULL;
+   pud = pud_alloc(mm, p4d, vaddr);
+   if (!pud)
+   return NULL;
+   pmd = pmd_alloc(mm, pud, vaddr);
+   if (!pmd)
+   return NULL;
+   pte = pte_alloc_map(mm, pmd, vaddr);
+   return pte;
+}
+
+static int iommu_v2_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+   struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+   pte_t *pte;
+   int ret, i, count;
+   bool updated = false;
+   unsigned long o_iova = iova;
+   unsigned long pte_pgsize;
+
+   BUG_ON(!IS_ALIGNED(iova, size) || !IS_ALIGNED(paddr, size));
+
+   ret = -EINVAL;
+   if (!(prot & IOMMU_PROT_MASK))
+   goto out;
+
+   count = PAGE_SIZE_PTE_COUNT(size);
+
+   for (i = 0; i < count; ++i, iova += PAGE_SIZE, paddr += PAGE_SIZE) {
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
+   if (!pte || pte_none(*pte)) {
+   pte = v2_pte_alloc_map(&dom->iop.v2_mm, iova);
+   if (!pte)
+   goto out;
+   } else {
+   updated = true;
+   }
+   set_pte(pte, __pte((paddr & 
PAGE_MASK)|_PAGE_PRESENT|_PAGE_USER));
+   if (prot & IOMMU_PROT_IW)
+   *pte = pte_mkwrite(*pte);
+   }
+
+   if (updated) {
+   if (count > 1)
+   amd_iommu_flush_tlb(&dom->domain, 0);
+   else
+   amd_iommu_flush_page(&dom->domain, 0, o_iova);
+   }

[RFC PATCH 2/7] iommu/amd: Update sanity check when enable PRI/ATS

2021-03-12 Thread Suravee Suthikulpanit
Currently, PPR/ATS can be enabled only if the domain is type
identity mapping. However, when we allow the IOMMU v2 page table
to be used for DMA-API, the sanity check needs to be updated to
only apply for the case when using AMD_IOMMU_V1 page table mode.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/iommu.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 6f3e42495709..f3800efdbb29 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1549,7 +1549,7 @@ static int pri_reset_while_enabled(struct pci_dev *pdev)
return 0;
 }
 
-static int pdev_iommuv2_enable(struct pci_dev *pdev)
+static int pdev_pri_ats_enable(struct pci_dev *pdev)
 {
bool reset_enable;
int reqs, ret;
@@ -1624,11 +1624,19 @@ static int attach_device(struct device *dev,
struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
 
ret = -EINVAL;
-   if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
+
+   /*
+* In case of using AMD_IOMMU_V1 page table mode, and the device
+* is enabling for PPR/ATS support (using v2 table),
+* we need to make sure that the domain type is identity map.
+*/
+   if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+   def_domain->type != IOMMU_DOMAIN_IDENTITY) {
goto out;
+   }
 
if (dev_data->iommu_v2) {
-   if (pdev_iommuv2_enable(pdev) != 0)
+   if (pdev_pri_ats_enable(pdev) != 0)
goto out;
 
dev_data->ats.enabled = true;
-- 
2.17.1



[RFC PATCH 1/7] iommu/amd: Refactor amd_iommu_domain_enable_v2

2021-03-12 Thread Suravee Suthikulpanit
The current function to enable IOMMU v2 also lock the domain.
In order to reuse the same code in different code path, in which
the domain has already been locked, refactor the function to separate
the locking from the enabling logic.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/iommu.c | 42 +--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a69a8b573e40..6f3e42495709 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -88,6 +88,7 @@ struct iommu_cmd {
 struct kmem_cache *amd_iommu_irq_cache;
 
 static void detach_device(struct device *dev);
+static int domain_enable_v2(struct protection_domain *domain, int pasids, bool 
has_ppr);
 
 /
  *
@@ -2304,10 +2305,9 @@ void amd_iommu_domain_direct_map(struct iommu_domain 
*dom)
 }
 EXPORT_SYMBOL(amd_iommu_domain_direct_map);
 
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+/* Note: This function expects iommu_domain->lock to be held prior calling the 
function. */
+static int domain_enable_v2(struct protection_domain *domain, int pasids, bool 
has_ppr)
 {
-   struct protection_domain *domain = to_pdomain(dom);
-   unsigned long flags;
int levels, ret;
 
if (pasids <= 0 || pasids > (PASID_MASK + 1))
@@ -2320,17 +2320,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, 
int pasids)
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
 
-   spin_lock_irqsave(&domain->lock, flags);
-
-   /*
-* Save us all sanity checks whether devices already in the
-* domain support IOMMUv2. Just force that the domain has no
-* devices attached when it is switched into IOMMUv2 mode.
-*/
-   ret = -EBUSY;
-   if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
-   goto out;
-
ret = -ENOMEM;
domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
if (domain->gcr3_tbl == NULL)
@@ -2344,8 +2333,31 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, 
int pasids)
ret = 0;
 
 out:
-   spin_unlock_irqrestore(&domain->lock, flags);
+   return ret;
+}
 
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+{
+   int ret;
+   unsigned long flags;
+   struct protection_domain *pdom = to_pdomain(dom);
+
+   spin_lock_irqsave(&pdom->lock, flags);
+
+   /*
+* Save us all sanity checks whether devices already in the
+* domain support IOMMUv2. Just force that the domain has no
+* devices attached when it is switched into IOMMUv2 mode.
+*/
+   ret = -EBUSY;
+   if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
+   goto out;
+
+   if (pdom->dev_cnt == 0 && !(pdom->gcr3_tbl))
+   ret = domain_enable_v2(pdom, pasids, true);
+
+out:
+   spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
 }
 EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-- 
2.17.1



[RFC PATCH 0/7] iommu/amd: Add Generic IO Page Table Framework Support for v2 Page Table

2021-03-12 Thread Suravee Suthikulpanit
This series introduces a new usage model for the v2 page table, where it
can be used to implement support for DMA-API by adopting the generic
IO page table framework.

One of the target usecases is to support nested IO page tables
where the guest uses the guest IO page table (v2) for translating
GVA to GPA, and the hypervisor uses the host I/O page table (v1) for
translating GPA to SPA. This is a pre-requisite for supporting the new
HW-assisted vIOMMU presented at the KVM Forum 2020.

  
https://static.sched.com/hosted_files/kvmforum2020/26/vIOMMU%20KVM%20Forum%202020.pdf

The following components are introduced in this series:

- Part 1 (patch 1-4 and 7)
  Refactor the current IOMMU page table v2 code
  to adopt the generic IO page table framework, and add
  AMD IOMMU Guest (v2) page table management code.

- Part 2 (patch 5)
  Add support for the AMD IOMMU Guest IO Protection feature (GIOV)
  where requests from the I/O device without a PASID are treated as
  if they have PASID of 0.

- Part 3 (patch 6)
  Introduce new amd_iommu_pgtable command-line to allow users
  to select the mode of operation (v1 or v2).

See AMD I/O Virtualization Technology Specification for more detail.

  http://www.amd.com/system/files/TechDocs/48882_IOMMU_3.05_PUB.pdf

Thanks,
Suravee

Suravee Suthikulpanit (7):
  iommu/amd: Refactor amd_iommu_domain_enable_v2
  iommu/amd: Update sanity check when enable PRI/ATS
  iommu/amd: Decouple the logic to enable PPR and GT
  iommu/amd: Initial support for AMD IOMMU v2 page table
  iommu/amd: Add support for Guest IO protection
  iommu/amd: Introduce amd_iommu_pgtable command-line option
  iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API

 .../admin-guide/kernel-parameters.txt |   6 +
 drivers/iommu/amd/Makefile|   2 +-
 drivers/iommu/amd/amd_iommu_types.h   |   5 +
 drivers/iommu/amd/init.c  |  42 ++-
 drivers/iommu/amd/io_pgtable_v2.c | 239 ++
 drivers/iommu/amd/iommu.c |  81 --
 drivers/iommu/io-pgtable.c|   1 +
 include/linux/io-pgtable.h|   2 +
 8 files changed, 345 insertions(+), 33 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable_v2.c

-- 
2.17.1



Re: [PATCH] iommu/amd: Fix event counter availability check

2021-02-22 Thread Suravee Suthikulpanit

This fix has been accepted in the upstream recently.

https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/commit/?h=x86/amd

Could you please give this a try?

Thanks,
Suravee

On 2/21/21 8:49 PM, Paul Menzel wrote:

Dear Suravee,


Am 17.09.20 um 19:55 schrieb Alexander Monakov:

On Tue, 16 Jun 2020, Suravee Suthikulpanit wrote:


Instead of blindly moving the code around to a spot that would just work,
I am trying to understand what might be required here. In this case,
the init_device_table_dma()should not be needed. I suspect it's the IOMMU
invalidate all command that's also needed here.

I'm also checking with the HW and BIOS team. Meanwhile, could you please
give
the following change a try:

Hello. Can you give any update please?


[…]


Sorry for late reply. I have a reproducer and working with the HW team to
understand the issue.
I should be able to provide update with solution by the end of this week.


Hello, hope you are doing well. Has this investigation found anything?


I am wondering the same. It’d be great to have this fixed in the upstream Linux 
kernel.


Kind regards,

Paul


[PATCH] iommu/amd: Fix performance counter initialization

2021-02-08 Thread Suravee Suthikulpanit
Certain AMD platforms enable power gating feature for IOMMU PMC,
which prevents the IOMMU driver from updating the counter while
trying to validate the PMC functionality in the init_iommu_perf_ctr().
This results in disabling PMC support and the following error message:

"AMD-Vi: Unable to read/write to IOMMU perf counter"

To workaround this issue, disable power gating temporarily by programming
the counter source to non-zero value while validating the counter,
and restore the prior state afterward.

Tested-by: Tj (Elloe Linux) 
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c | 45 ++--
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 83d8ab2aed9f..01da76dc1caa 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -254,6 +255,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
 static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+   u8 fxn, u64 *value, bool is_write);
 
 static bool amd_iommu_pre_enabled = true;
 
@@ -1712,13 +1715,11 @@ static int __init init_iommu_all(struct 
acpi_table_header *table)
return 0;
 }
 
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
-   u8 fxn, u64 *value, bool is_write);
-
-static void init_iommu_perf_ctr(struct amd_iommu *iommu)
+static void __init init_iommu_perf_ctr(struct amd_iommu *iommu)
 {
+   int retry;
struct pci_dev *pdev = iommu->dev;
-   u64 val = 0xabcd, val2 = 0, save_reg = 0;
+   u64 val = 0xabcd, val2 = 0, save_reg, save_src;
 
if (!iommu_feature(iommu, FEATURE_PC))
return;
@@ -1726,17 +1727,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
amd_iommu_pc_present = true;
 
/* save the value to restore, if writable */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) ||
+   iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false))
goto pc_false;
 
-   /* Check if the performance counters can be written to */
-   if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
-   (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
-   (val != val2))
+   /*
+* Disable power gating by programing the performance counter
+* source to 20 (i.e. counts the reads and writes from/to IOMMU
+* Reserved Register [MMIO Offset 1FF8h] that are ignored.),
+* which never get incremented during this init phase.
+* (Note: The event is also deprecated.)
+*/
+   val = 20;
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true))
goto pc_false;
 
+   /* Check if the performance counters can be written to */
+   val = 0xabcd;
+   for (retry = 5; retry; retry--) {
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) ||
+   iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) ||
+   val2)
+   break;
+
+   /* Wait about 20 msec for power gating to disable and retry. */
+   msleep(20);
+   }
+
/* restore */
-   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
+   if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) ||
+   iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true))
+   goto pc_false;
+
+   if (val != val2)
goto pc_false;
 
pci_info(pdev, "IOMMU performance counters supported\n");
-- 
2.17.1



Re: [PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support

2021-01-27 Thread Suravee Suthikulpanit




On 1/27/21 7:06 PM, Joerg Roedel wrote:

Hi Suravee,

On Tue, Dec 15, 2020 at 01:36:52AM -0600, Suravee Suthikulpanit wrote:
  

Suravee Suthikulpanit (13):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table


Applied this series, thanks for the work! Given testing goes well you
can consider this queued for 5.12.

Thanks,

Joerg



Thanks Joerg and Will, and welcome back!!!

Suravee


Re: [PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support

2021-01-04 Thread Suravee Suthikulpanit

Hi Joerg / Will,

Happy New Year!! Just want to follow up on this series.

Thanks,
Suravee

On 12/15/20 2:36 PM, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V3 
(https://lore.kernel.org/linux-iommu/20201004014549.16065-1-suravee.suthikulpa...@amd.com/)
   - Rebase to v5.10
   - Patch  2: Add struct iommu_flush_ops (previously in patch 13 of v3)
   - Patch  7: Consolidate logic into v1_free_pgtable() instead of 
amd_iommu_free_pgtable()
   - Patch 12: Check ops->[map|unmap] before calling.
   - Patch 13: Setup page table when allocating domain (instead of when 
attaching device).

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
   - Patch  2: Introduce helper function io_pgtable_cfg_to_data.
   - Patch 13: Put back the struct iommu_flush_ops since patch v2 would run into
 NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
   - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
 not currently used. (per Robin)
   - Remove unused struct iommu_flush_ops.  (patch 2/13)
   - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
 patch 13/13)

Suravee Suthikulpanit (13):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table

  drivers/iommu/amd/Kconfig   |   1 +
  drivers/iommu/amd/Makefile  |   2 +-
  drivers/iommu/amd/amd_iommu.h   |  22 +
  drivers/iommu/amd/amd_iommu_types.h |  43 +-
  drivers/iommu/amd/init.c|   2 +
  drivers/iommu/amd/io_pgtable.c  | 564 +++
  drivers/iommu/amd/iommu.c   | 672 
  drivers/iommu/io-pgtable.c  |   3 +
  include/linux/io-pgtable.h  |   2 +
  9 files changed, 707 insertions(+), 604 deletions(-)
  create mode 100644 drivers/iommu/amd/io_pgtable.c



[PATCH v4 12/13] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page

2020-12-14 Thread Suravee Suthikulpanit
These implement map and unmap for AMD IOMMU v1 pagetable, which
will be used by the IO pagetable framework.

Also clean up unused extern function declarations.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  | 13 -
 drivers/iommu/amd/io_pgtable.c | 25 -
 drivers/iommu/amd/iommu.c  | 13 -
 3 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 83ca822c5349..3770b1a4d51c 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -133,19 +133,6 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
-/* TODO: These are temporary and will be removed once fully transition */
-extern int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp);
-extern unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size);
-extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index a293b69b38b9..d91964e98d58 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct amd_io_pgtable *pgtable,
-  unsigned long address,
-  unsigned long *page_size)
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
 {
int level;
u64 *pte;
@@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-int iommu_map_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long paddr,
-  unsigned long size,
-  int prot,
-  gfp_t gfp)
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
+   struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
struct page *freelist = NULL;
bool updated = false;
u64 __pte, *pte;
@@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom,
return ret;
 }
 
-unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long size)
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
 {
-   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
@@ -554,6 +551,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
cfg->oas= IOMMU_OUT_ADDR_BIT_SIZE,
cfg->tlb= &v1_flush_ops;
 
+   pgtable->iop.ops.map  = iommu_v1_map_page;
+   pgtable->iop.ops.unmap= iommu_v1_unmap_page;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
 
return &pgtable->iop;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 29b7fefc8485..1f04b251f0c6 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2066,8 +2066,9 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
 gfp_t gfp)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
-   int ret;
+   int ret = -EINVAL;
 
if (domain->iop.mode == PAGE_MODE_NONE)
return -EINVAL;
@@ -2077,9 +2078,10 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
 
-

[PATCH v4 11/13] iommu/amd: Introduce iommu_v1_iova_to_phys

2020-12-14 Thread Suravee Suthikulpanit
This implements iova_to_phys for AMD IOMMU v1 pagetable,
which will be used by the IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 22 ++
 drivers/iommu/amd/iommu.c  | 16 +---
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 87184b6cee0f..a293b69b38b9 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
return unmapped;
 }
 
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned 
long iova)
+{
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+   unsigned long offset_mask, pte_pgsize;
+   u64 *pte, __pte;
+
+   if (pgtable->mode == PAGE_MODE_NONE)
+   return iova;
+
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+   if (!pte || !IOMMU_PTE_PRESENT(*pte))
+   return 0;
+
+   offset_mask = pte_pgsize - 1;
+   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
+
+   return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
 /*
  * 
  */
@@ -534,6 +554,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
cfg->oas= IOMMU_OUT_ADDR_BIT_SIZE,
cfg->tlb= &v1_flush_ops;
 
+   pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+
return &pgtable->iop;
 }
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 76f61dd6b89f..29b7fefc8485 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2101,22 +2101,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
 {
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
-   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
-   unsigned long offset_mask, pte_pgsize;
-   u64 *pte, __pte;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
-   return iova;
-
-   pte = fetch_pte(pgtable, iova, &pte_pgsize);
-
-   if (!pte || !IOMMU_PTE_PRESENT(*pte))
-   return 0;
-
-   offset_mask = pte_pgsize - 1;
-   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
-
-   return (__pte & ~offset_mask) | (iova & offset_mask);
+   return ops->iova_to_phys(ops, iova);
 }
 
 static bool amd_iommu_capable(enum iommu_cap cap)
-- 
2.17.1



[PATCH v4 04/13] iommu/amd: Convert to using amd_io_pgtable

2020-12-14 Thread Suravee Suthikulpanit
Make use of the new struct amd_io_pgtable in preparation to remove
the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  1 +
 drivers/iommu/amd/iommu.c | 25 ++---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index b8dae3941f0f..bf9723b35e77 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -56,6 +56,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain 
*dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 5b93536d6877..fdb6030b505d 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -89,8 +89,6 @@ struct kmem_cache *amd_iommu_irq_cache;
 
 static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
 
 /
  *
@@ -1502,7 +1500,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
 
pgtable.root  = pte;
pgtable.mode += 1;
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
domain_flush_complete(domain);
 
/*
@@ -1877,17 +1875,16 @@ static void free_gcr3_table(struct protection_domain 
*domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
  bool ats, bool ppr)
 {
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
 
-   if (pgtable->mode != PAGE_MODE_NONE)
-   pte_root = iommu_virt_to_phys(pgtable->root);
+   if (domain->iop.mode != PAGE_MODE_NONE)
+   pte_root = iommu_virt_to_phys(domain->iop.root);
 
-   pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+   pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1977,7 +1974,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   set_dte_entry(dev_data->devid, domain, &pgtable,
+   set_dte_entry(dev_data->devid, domain,
  ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
 
@@ -2284,22 +2281,20 @@ static int amd_iommu_domain_get_attr(struct 
iommu_domain *domain,
  *
  */
 
-static void update_device_table(struct protection_domain *domain,
-   struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
 {
struct iommu_dev_data *dev_data;
 
list_for_each_entry(dev_data, &domain->dev_list, list) {
-   set_dte_entry(dev_data->devid, domain, pgtable,
+   set_dte_entry(dev_data->devid, domain,
  dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
 }
 
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
 {
-   update_device_table(domain, pgtable);
+   update_device_table(domain);
domain_flush_devices(domain);
 }
 
@@ -2309,7 +2304,7 @@ static void update_domain(struct protection_domain 
*domain)
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
-- 
2.17.1



[PATCH v4 02/13] iommu/amd: Prepare for generic IO page table framework

2020-12-14 Thread Suravee Suthikulpanit
Add initial hook up code to implement generic IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/Kconfig   |  1 +
 drivers/iommu/amd/Makefile  |  2 +-
 drivers/iommu/amd/amd_iommu_types.h | 35 ++
 drivers/iommu/amd/io_pgtable.c  | 75 +
 drivers/iommu/amd/iommu.c   | 10 
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 +
 7 files changed, 117 insertions(+), 11 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+   select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
  With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 494b42a31b7a..5d77f34e0fda 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
 
 #define GA_GUEST_NR0x1
 
+#define IOMMU_IN_ADDR_BIT_SIZE  52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES  ((~0xFFFUL) & ~(2ULL << 38))
+
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
@@ -465,6 +479,26 @@ struct amd_irte_ops;
 
 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
 
+#define io_pgtable_to_data(x) \
+   container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+   io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+   container_of(io_pgtable_ops_to_data(x), \
+struct protection_domain, iop)
+
+#define io_pgtable_cfg_to_data(x) \
+   container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+
+struct amd_io_pgtable {
+   struct io_pgtable_cfg   pgtbl_cfg;
+   struct io_pgtable   iop;
+   int mode;
+   u64 *root;
+};
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -473,6 +507,7 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
   iommu core code */
+   struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
atomic64_t pt_root; /* pgtable root and pgtable mode */
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index ..aedf2c932c40
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit 
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt)pr_fmt(fmt)
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_flush_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+unsigned long iova, size_t granule,
+   

[PATCH v4 10/13] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable

2020-12-14 Thread Suravee Suthikulpanit
To simplify the fetch_pte function. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 13 +++--
 drivers/iommu/amd/iommu.c  |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 76276d9e463c..83ca822c5349 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -143,7 +143,7 @@ extern int iommu_map_page(struct protection_domain *dom,
 extern unsigned long iommu_unmap_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long page_size);
-extern u64 *fetch_pte(struct protection_domain *domain,
+extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  unsigned long address,
  unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 35dd9153e6b7..87184b6cee0f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct protection_domain *domain,
+u64 *fetch_pte(struct amd_io_pgtable *pgtable,
   unsigned long address,
   unsigned long *page_size)
 {
@@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain,
 
*page_size = 0;
 
-   if (address > PM_LEVEL_SIZE(domain->iop.mode))
+   if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
 
-   level  =  domain->iop.mode - 1;
-   pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+   level  =  pgtable->mode - 1;
+   pte= &pgtable->root[PM_LEVEL_INDEX(level, address)];
*page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
while (level > 0) {
@@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
   unsigned long iova,
   unsigned long size)
 {
+   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
@@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
unmapped = 0;
 
while (unmapped < size) {
-   pte = fetch_pte(dom, iova, &unmap_size);
-
+   pte = fetch_pte(pgtable, iova, &unmap_size);
if (pte) {
int i, count;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2963a37b7c16..76f61dd6b89f 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2100,13 +2100,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
  dma_addr_t iova)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
 
if (domain->iop.mode == PAGE_MODE_NONE)
return iova;
 
-   pte = fetch_pte(domain, iova, &pte_pgsize);
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
-- 
2.17.1



[PATCH v4 08/13] iommu/amd: Remove amd_iommu_domain_get_pgtable

2020-12-14 Thread Suravee Suthikulpanit
Since the IO page table root and mode parameters have been moved into
the struct amd_io_pg, the function is no longer needed. Therefore,
remove it along with the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   |  4 ++--
 drivers/iommu/amd/amd_iommu_types.h |  6 -
 drivers/iommu/amd/io_pgtable.c  | 36 ++---
 drivers/iommu/amd/iommu.c   | 34 ---
 4 files changed, 19 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 91d098003f12..76276d9e463c 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -110,6 +110,8 @@ static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
atomic64_set(&domain->iop.pt_root, root);
+   domain->iop.root = (u64 *)(root & PAGE_MASK);
+   domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
 static inline
@@ -144,8 +146,6 @@ extern unsigned long iommu_unmap_page(struct 
protection_domain *dom,
 extern u64 *fetch_pte(struct protection_domain *domain,
  unsigned long address,
  unsigned long *page_size);
-extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
-struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 #endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 7c971c76d685..6897567d307e 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -518,12 +518,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
-/* For decocded pt_root */
-struct domain_pgtable {
-   int mode;
-   u64 *root;
-};
-
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index dc674e79ddf0..d4d131e43dcd 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -184,30 +184,27 @@ static bool increase_address_space(struct 
protection_domain *domain,
   unsigned long address,
   gfp_t gfp)
 {
-   struct domain_pgtable pgtable;
unsigned long flags;
bool ret = true;
u64 *pte;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   if (address <= PM_LEVEL_SIZE(pgtable.mode))
+   if (address <= PM_LEVEL_SIZE(domain->iop.mode))
goto out;
 
ret = false;
-   if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
+   if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
 
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
 
-   *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+   *pte = PM_LEVEL_PDE(domain->iop.mode, 
iommu_virt_to_phys(domain->iop.root));
 
-   pgtable.root  = pte;
-   pgtable.mode += 1;
+   domain->iop.root  = pte;
+   domain->iop.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
amd_iommu_domain_flush_complete(domain);
 
@@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain 
*domain,
 * Device Table needs to be updated and flushed before the new root can
 * be published.
 */
-   amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
+   amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
ret = true;
 
@@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain,
  gfp_t gfp,
  bool *updated)
 {
-   struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
 
BUG_ON(!is_power_of_2(page_size));
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+   while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
/*
 * Return an error if there is no memory to update the
 * page-table.
 */
if (!increase_address_space(domain, address, gfp))
return NULL;
-
-   /* Read new values to check if update was successful */
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
}
 
 
-   level   = pgtable.mode - 1;
-   pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
+   level   = domain->iop.mode - 1;
+   pte = &domain->iop.root[PM_LEVEL_INDE

[PATCH v4 06/13] iommu/amd: Move IO page table related functions

2020-12-14 Thread Suravee Suthikulpanit
Preparing to migrate to use IO page table framework.
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  18 ++
 drivers/iommu/amd/io_pgtable.c | 473 
 drivers/iommu/amd/iommu.c  | 476 +
 3 files changed, 493 insertions(+), 474 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index bf29ab8c99f0..1bad42a3c73c 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -131,4 +131,22 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
+/* TODO: These are temporary and will be removed once fully transition */
+extern void free_pagetable(struct domain_pgtable *pgtable);
+extern int iommu_map_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long phys_addr,
+ unsigned long page_size,
+ int prot,
+ gfp_t gfp);
+extern unsigned long iommu_unmap_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long page_size);
+extern u64 *fetch_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long *page_size);
+extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+struct domain_pgtable *pgtable);
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+u64 *root, int mode);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index aedf2c932c40..345e9bc81fde 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -50,6 +50,479 @@ static const struct iommu_flush_ops v1_flush_ops = {
.tlb_add_page   = v1_tlb_add_page,
 };
 
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+unsigned long *count)
+{
+   unsigned long pte_mask, pg_size, cnt;
+   u64 *fpte;
+
+   pg_size  = PTE_PAGE_SIZE(*pte);
+   cnt  = PAGE_SIZE_PTE_COUNT(pg_size);
+   pte_mask = ~((cnt << 3) - 1);
+   fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+   if (page_size)
+   *page_size = pg_size;
+
+   if (count)
+   *count = cnt;
+
+   return fpte;
+}
+
+/
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ /
+
+static void free_page_list(struct page *freelist)
+{
+   while (freelist != NULL) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+   struct page *p = virt_to_page((void *)pt);
+
+   p->freelist = freelist;
+
+   return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) 
\
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)  
\
+{  
\
+   unsigned long p;
\
+   u64 *pt;
\
+   int i;  
\
+   
\
+   pt = (u64 *)__pt;   
\
+   
\
+   for (i = 0; i < 512; ++i) { 
\
+   /* PTE present? */  
\
+   if (!IOMMU_PTE_PRESENT(pt[i]))  
\
+   continue;   
\
+   
\
+   /* Large PTE? */
\
+   if (PM_PTE_LEVEL(pt[i]) == 0 || 
\
+   PM_PTE_LEVEL(pt[i]) == 7)   
\
+   continue;   
\
+   
\
+   p = (unsigned long)

[PATCH v4 13/13] iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table

2020-12-14 Thread Suravee Suthikulpanit
Switch to using IO page table framework for AMD IOMMU v1 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  1 +
 drivers/iommu/amd/init.c  |  2 ++
 drivers/iommu/amd/iommu.c | 48 ++-
 3 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 3770b1a4d51c..91452e0ff072 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -36,6 +36,7 @@ extern void amd_iommu_disable(void);
 extern int amd_iommu_reenable(int);
 extern int amd_iommu_enable_faulting(void);
 extern int amd_iommu_guest_ir;
+extern enum io_pgtable_fmt amd_iommu_pgtable;
 
 /* IOMMUv2 specific functions */
 struct iommu_domain;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 23a790f8f550..5fb4bea14cc4 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -147,6 +147,8 @@ struct ivmd_header {
 bool amd_iommu_dump;
 bool amd_iommu_irq_remap __read_mostly;
 
+enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
+
 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 1f04b251f0c6..571e8806e4a1 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1901,7 +1902,7 @@ static void protection_domain_free(struct 
protection_domain *domain)
kfree(domain);
 }
 
-static int protection_domain_init(struct protection_domain *domain, int mode)
+static int protection_domain_init_v1(struct protection_domain *domain, int 
mode)
 {
u64 *pt_root = NULL;
 
@@ -1924,34 +1925,55 @@ static int protection_domain_init(struct 
protection_domain *domain, int mode)
return 0;
 }
 
-static struct protection_domain *protection_domain_alloc(int mode)
+static struct protection_domain *protection_domain_alloc(unsigned int type)
 {
+   struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
+   int pgtable = amd_iommu_pgtable;
+   int mode = DEFAULT_PGTABLE_LEVEL;
+   int ret;
 
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
 
-   if (protection_domain_init(domain, mode))
+   /*
+* Force IOMMU v1 page table when iommu=pt and
+* when allocating domain for pass-through devices.
+*/
+   if (type == IOMMU_DOMAIN_IDENTITY) {
+   pgtable = AMD_IOMMU_V1;
+   mode = PAGE_MODE_NONE;
+   } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+   pgtable = AMD_IOMMU_V1;
+   }
+
+   switch (pgtable) {
+   case AMD_IOMMU_V1:
+   ret = protection_domain_init_v1(domain, mode);
+   break;
+   default:
+   ret = -EINVAL;
+   }
+
+   if (ret)
goto out_err;
 
-   return domain;
+   pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, 
domain);
+   if (!pgtbl_ops)
+   goto out_err;
 
+   return domain;
 out_err:
kfree(domain);
-
return NULL;
 }
 
 static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 {
struct protection_domain *domain;
-   int mode = DEFAULT_PGTABLE_LEVEL;
-
-   if (type == IOMMU_DOMAIN_IDENTITY)
-   mode = PAGE_MODE_NONE;
 
-   domain = protection_domain_alloc(mode);
+   domain = protection_domain_alloc(type);
if (!domain)
return NULL;
 
@@ -2070,7 +2092,8 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
int prot = 0;
int ret = -EINVAL;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
+   if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+   (domain->iop.mode == PAGE_MODE_NONE))
return -EINVAL;
 
if (iommu_prot & IOMMU_READ)
@@ -2093,7 +2116,8 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, 
unsigned long iova,
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
+   if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+   (domain->iop.mode == PAGE_MODE_NONE))
return 0;
 
return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
-- 
2.17.1



[PATCH v4 07/13] iommu/amd: Restructure code for freeing page table

2020-12-14 Thread Suravee Suthikulpanit
By consolidate logic into v1_free_pgtable helper function,
which is called from IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  1 -
 drivers/iommu/amd/io_pgtable.c | 41 --
 drivers/iommu/amd/iommu.c  | 21 -
 3 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 1bad42a3c73c..91d098003f12 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -132,7 +132,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
 /* TODO: These are temporary and will be removed once fully transition */
-extern void free_pagetable(struct domain_pgtable *pgtable);
 extern int iommu_map_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long phys_addr,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 345e9bc81fde..dc674e79ddf0 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -163,23 +163,6 @@ static struct page *free_sub_pt(unsigned long root, int 
mode,
return freelist;
 }
 
-void free_pagetable(struct domain_pgtable *pgtable)
-{
-   struct page *freelist = NULL;
-   unsigned long root;
-
-   if (pgtable->mode == PAGE_MODE_NONE)
-   return;
-
-   BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
-  pgtable->mode > PAGE_MODE_6_LEVEL);
-
-   root = (unsigned long)pgtable->root;
-   freelist = free_sub_pt(root, pgtable->mode, freelist);
-
-   free_page_list(freelist);
-}
-
 void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
  u64 *root, int mode)
 {
@@ -528,6 +511,30 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
  */
 static void v1_free_pgtable(struct io_pgtable *iop)
 {
+   struct amd_io_pgtable *pgtable = container_of(iop, struct 
amd_io_pgtable, iop);
+   struct protection_domain *dom;
+   struct page *freelist = NULL;
+   unsigned long root;
+
+   if (pgtable->mode == PAGE_MODE_NONE)
+   return;
+
+   dom = container_of(pgtable, struct protection_domain, iop);
+
+   /* Update data structure */
+   amd_iommu_domain_clr_pt_root(dom);
+
+   /* Make changes visible to IOMMUs */
+   amd_iommu_domain_update(dom);
+
+   /* Page-table is not visible to IOMMU anymore, so free it */
+   BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+  pgtable->mode > PAGE_MODE_6_LEVEL);
+
+   root = (unsigned long)pgtable->root;
+   freelist = free_sub_pt(root, pgtable->mode, freelist);
+
+   free_page_list(freelist);
 }
 
 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index e823a457..37ecedce2c14 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1903,17 +1903,14 @@ static void cleanup_domain(struct protection_domain 
*domain)
 
 static void protection_domain_free(struct protection_domain *domain)
 {
-   struct domain_pgtable pgtable;
-
if (!domain)
return;
 
if (domain->id)
domain_id_free(domain->id);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-   amd_iommu_domain_clr_pt_root(domain);
-   free_pagetable(&pgtable);
+   if (domain->iop.pgtbl_cfg.tlb)
+   free_io_pgtable_ops(&domain->iop.iop.ops);
 
kfree(domain);
 }
@@ -2302,22 +2299,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
struct protection_domain *domain = to_pdomain(dom);
-   struct domain_pgtable pgtable;
unsigned long flags;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   /* First save pgtable configuration*/
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   /* Remove page-table from domain */
-   amd_iommu_domain_clr_pt_root(domain);
-
-   /* Make changes visible to IOMMUs */
-   amd_iommu_domain_update(domain);
-
-   /* Page-table is not visible to IOMMU anymore, so free it */
-   free_pagetable(&pgtable);
+   if (domain->iop.pgtbl_cfg.tlb)
+   free_io_pgtable_ops(&domain->iop.iop.ops);
 
spin_unlock_irqrestore(&domain->lock, flags);
 }
-- 
2.17.1



[PATCH v4 03/13] iommu/amd: Move pt_root to struct amd_io_pgtable

2020-12-14 Thread Suravee Suthikulpanit
To better organize the data structure since it contains IO page table
related information.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   | 2 +-
 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 drivers/iommu/amd/iommu.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 0817bc732d1a..b8dae3941f0f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -105,7 +105,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
 static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
-   atomic64_set(&domain->pt_root, root);
+   atomic64_set(&domain->iop.pt_root, root);
 }
 
 static inline
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 5d77f34e0fda..7c971c76d685 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -497,6 +497,7 @@ struct amd_io_pgtable {
struct io_pgtable   iop;
int mode;
u64 *root;
+   atomic64_t  pt_root;/* pgtable root and pgtable mode */
 };
 
 /*
@@ -510,7 +511,6 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
-   atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx;/* Number of levels for GCR3 table */
u64 *gcr3_tbl;  /* Guest CR3 table */
unsigned long flags;/* flags to find out type of domain */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 45d3977d6c00..5b93536d6877 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -145,7 +145,7 @@ static struct protection_domain *to_pdomain(struct 
iommu_domain *dom)
 static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
 struct domain_pgtable *pgtable)
 {
-   u64 pt_root = atomic64_read(&domain->pt_root);
+   u64 pt_root = atomic64_read(&domain->iop.pt_root);
 
pgtable->root = (u64 *)(pt_root & PAGE_MASK);
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-- 
2.17.1



[PATCH v4 05/13] iommu/amd: Declare functions as extern

2020-12-14 Thread Suravee Suthikulpanit
And move declaration to header file so that they can be included across
multiple files. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  3 +++
 drivers/iommu/amd/iommu.c | 39 +--
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index bf9723b35e77..bf29ab8c99f0 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -57,6 +57,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain 
*dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
 extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index fdb6030b505d..1b10710c91cf 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -87,7 +87,6 @@ struct iommu_cmd {
 
 struct kmem_cache *amd_iommu_irq_cache;
 
-static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /
@@ -1314,12 +1313,12 @@ static void domain_flush_pages(struct protection_domain 
*domain,
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
 {
int i;
 
@@ -1344,7 +1343,7 @@ static void domain_flush_np_cache(struct 
protection_domain *domain,
 
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
 }
@@ -1501,7 +1500,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
pgtable.root  = pte;
pgtable.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/*
 * Device Table needs to be updated and flushed before the new root can
@@ -1754,8 +1753,8 @@ static int iommu_map_page(struct protection_domain *dom,
 * Updates and flushing already happened in
 * increase_address_space().
 */
-   domain_flush_tlb_pde(dom);
-   domain_flush_complete(dom);
+   amd_iommu_domain_flush_tlb_pde(dom);
+   amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
 
@@ -1998,10 +1997,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
 
/* Flush IOTLB */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
/* Wait for the flushes to finish */
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2134,9 +2133,9 @@ static int attach_device(struct device *dev,
 * left the caches in the IOMMU dirty. So we have to flush
 * here to evict all dirty stuff.
 */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
 out:
spin_unlock(&dev_data->lock);
@@ -2298,7 +2297,7 @@ void amd_iommu_update_and_flush_device_table(struct 
protection_domain *domain)
domain_flush_devices(domain);
 }
 
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
 {
struct domain_pgtable pgtable;
 
@@ -2307,8 +2306,8 @@ static void update_domain(struct protection_domain 
*domain)
amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
-   domain_flush_tlb_pde(domain);
- 

[PATCH v4 09/13] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops

2020-12-14 Thread Suravee Suthikulpanit
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index d4d131e43dcd..35dd9153e6b7 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * and full 64 bit address spaces.
  */
 int iommu_map_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long phys_addr,
-  unsigned long page_size,
+  unsigned long iova,
+  unsigned long paddr,
+  unsigned long size,
   int prot,
   gfp_t gfp)
 {
@@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom,
u64 __pte, *pte;
int ret, i, count;
 
-   BUG_ON(!IS_ALIGNED(bus_addr, page_size));
-   BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+   BUG_ON(!IS_ALIGNED(iova, size));
+   BUG_ON(!IS_ALIGNED(paddr, size));
 
ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
goto out;
 
-   count = PAGE_SIZE_PTE_COUNT(page_size);
-   pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+   count = PAGE_SIZE_PTE_COUNT(size);
+   pte   = alloc_pte(dom, iova, size, NULL, gfp, &updated);
 
ret = -ENOMEM;
if (!pte)
@@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom,
updated = true;
 
if (count > 1) {
-   __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
+   __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+   __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom,
 }
 
 unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long page_size)
+  unsigned long iova,
+  unsigned long size)
 {
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
 
-   BUG_ON(!is_power_of_2(page_size));
+   BUG_ON(!is_power_of_2(size));
 
unmapped = 0;
 
-   while (unmapped < page_size) {
-
-   pte = fetch_pte(dom, bus_addr, &unmap_size);
+   while (unmapped < size) {
+   pte = fetch_pte(dom, iova, &unmap_size);
 
if (pte) {
int i, count;
@@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
pte[i] = 0ULL;
}
 
-   bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
+   iova = (iova & ~(unmap_size - 1)) + unmap_size;
unmapped += unmap_size;
}
 
-- 
2.17.1



[PATCH v4 01/13] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline

2020-12-14 Thread Suravee Suthikulpanit
Move the function to header file to allow inclusion in other files.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h | 13 +
 drivers/iommu/amd/iommu.c | 10 --
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6b8cbdf71714..0817bc732d1a 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -102,6 +102,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
 }
 
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+   atomic64_set(&domain->pt_root, root);
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+   amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
 extern bool translation_pre_enabled(struct amd_iommu *iommu);
 extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 struct device *dev);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b9cf59443843..7f6b0f60b958 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -161,16 +161,6 @@ static void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 
root)
-{
-   atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
-   amd_iommu_domain_set_pt_root(domain, 0);
-}
-
 static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode)
 {
-- 
2.17.1



[PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support

2020-12-14 Thread Suravee Suthikulpanit
The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V3 
(https://lore.kernel.org/linux-iommu/20201004014549.16065-1-suravee.suthikulpa...@amd.com/)
  - Rebase to v5.10
  - Patch  2: Add struct iommu_flush_ops (previously in patch 13 of v3)
  - Patch  7: Consolidate logic into v1_free_pgtable() instead of 
amd_iommu_free_pgtable()
  - Patch 12: Check ops->[map|unmap] before calling.
  - Patch 13: Setup page table when allocating domain (instead of when 
attaching device).

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
  - Patch  2: Introduce helper function io_pgtable_cfg_to_data.
  - Patch 13: Put back the struct iommu_flush_ops since patch v2 would run into
NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
  - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
not currently used. (per Robin)
  - Remove unused struct iommu_flush_ops.  (patch 2/13)
  - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
patch 13/13)

Suravee Suthikulpanit (13):
  iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
  iommu/amd: Prepare for generic IO page table framework
  iommu/amd: Move pt_root to struct amd_io_pgtable
  iommu/amd: Convert to using amd_io_pgtable
  iommu/amd: Declare functions as extern
  iommu/amd: Move IO page table related functions
  iommu/amd: Restructure code for freeing page table
  iommu/amd: Remove amd_iommu_domain_get_pgtable
  iommu/amd: Rename variables to be consistent with struct
io_pgtable_ops
  iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
  iommu/amd: Introduce iommu_v1_iova_to_phys
  iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
  iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table

 drivers/iommu/amd/Kconfig   |   1 +
 drivers/iommu/amd/Makefile  |   2 +-
 drivers/iommu/amd/amd_iommu.h   |  22 +
 drivers/iommu/amd/amd_iommu_types.h |  43 +-
 drivers/iommu/amd/init.c|   2 +
 drivers/iommu/amd/io_pgtable.c  | 564 +++
 drivers/iommu/amd/iommu.c   | 672 
 drivers/iommu/io-pgtable.c  |   3 +
 include/linux/io-pgtable.h  |   2 +
 9 files changed, 707 insertions(+), 604 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

-- 
2.17.1



[PATCH] iommu/amd: Add sanity check for interrupt remapping table length macros

2020-12-10 Thread Suravee Suthikulpanit
Currently, macros related to the interrupt remapping table length are
defined separately. This has resulted in an oversight in which one of
the macros were missed when changing the length. To prevent this,
redefine the macros to add built-in sanity check.

Also, rename macros to use the name of the DTE[IntTabLen] field as
specified in the AMD IOMMU specification. There is no functional change.

Suggested-by: Linus Torvalds 
Reviewed-by: Tom Lendacky 
Signed-off-by: Suravee Suthikulpanit 
Cc: Will Deacon 
Cc: Jerry Snitselaar 
Cc: Joerg Roedel 
---
 drivers/iommu/amd/amd_iommu_types.h | 19 ++-
 drivers/iommu/amd/init.c|  6 +++---
 drivers/iommu/amd/iommu.c   |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 494b42a31b7a..899ce62df3f0 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -255,11 +255,19 @@
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
-#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
 #define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (9ULL << 1)
 #define DTE_IRQ_REMAP_ENABLE1ULL
 
+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define DTE_INTTAB_ALIGNMENT128
+#define DTE_INTTABLEN_VALUE 9ULL
+#define DTE_INTTABLEN   (DTE_INTTABLEN_VALUE << 1)
+#define DTE_INTTABLEN_MASK  (0xfULL << 1)
+#define MAX_IRQS_PER_TABLE  (1 << DTE_INTTABLEN_VALUE)
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
@@ -409,13 +417,6 @@ extern bool amd_iommu_np_cache;
 /* Only true if all IOMMUs support device IOTLBs */
 extern bool amd_iommu_iotlb_sup;
 
-/*
- * AMD IOMMU hardware only support 512 IRTEs despite
- * the architectural limitation of 2048 entries.
- */
-#define MAX_IRQS_PER_TABLE 512
-#define IRQ_TABLE_ALIGNMENT128
-
 struct irq_remap_table {
raw_spinlock_t lock;
unsigned min_index;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 23a790f8f550..6bec8913d064 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -989,10 +989,10 @@ static bool copy_device_table(void)
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
-   int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
+   int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
if (irq_v && (int_ctl || int_tab_len)) {
if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
-   (int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   (int_tab_len != DTE_INTTABLEN)) {
pr_err("Wrong old irq remapping flag: %#x\n", 
devid);
return false;
}
@@ -2674,7 +2674,7 @@ static int __init early_amd_iommu_init(void)
remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
remap_cache_sz,
-   IRQ_TABLE_ALIGNMENT,
+   DTE_INTTAB_ALIGNMENT,
0, NULL);
if (!amd_iommu_irq_cache)
goto out;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b9cf59443843..f7abf16d1e3a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3191,7 +3191,7 @@ static void set_dte_irq_entry(u16 devid, struct 
irq_remap_table *table)
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
dte |= iommu_virt_to_phys(table->table);
dte |= DTE_IRQ_REMAP_INTCTL;
-   dte |= DTE_IRQ_TABLE_LEN;
+   dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
 
amd_iommu_dev_table[devid].data[2] = dte;
-- 
2.17.1



Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-10 Thread Suravee Suthikulpanit

Hi All,

On 12/10/20 1:50 AM, Will Deacon wrote:

On Wed, Dec 09, 2020 at 10:07:46AM -0800, Linus Torvalds wrote:

On Wed, Dec 9, 2020 at 6:12 AM Will Deacon  wrote:


Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix
for a fix, where the size of the interrupt remapping table was increased
but a related constant for the size of the interrupt table was forgotten.


Pulled.


Thanks.


However, why didn't this then add some sanity checking for the two
different #defines to be in sync?

IOW, something like

#define AMD_IOMMU_IRQ_TABLE_SHIFT 9

#define MAX_IRQS_PER_TABLE (1 << AMD_IOMMU_IRQ_TABLE_SHIFT)
#define DTE_IRQ_TABLE_LEN ((u64)AMD_IOMMU_IRQ_TABLE_SHIFT << 1)

or whatever. Hmm?


This looks like a worthwhile change to me, but I don't have any hardware
so I've been very reluctant to make even "obvious" driver changes here.

Suravee -- please can you post a patch implementing the above?


I'll send one out ASAP.




That way this won't happen again, but perhaps equally importantly the
linkage will be more clear, and there won't be those random constants.

Naming above is probably garbage - I assume there's some actual
architectural name for that irq table length field in the DTE?


The one in the spec is even better: "IntTabLen".

Will


Thanks,
Suravee


[PATCH] iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs

2020-12-07 Thread Suravee Suthikulpanit
According to the AMD IOMMU spec, the commit 73db2fc595f3
("iommu/amd: Increase interrupt remapping table limit to 512 entries")
also requires the interrupt table length (IntTabLen) to be set to 9
(power of 2) in the device table mapping entry (DTE).

Fixes: 73db2fc595f3 ("iommu/amd: Increase interrupt remapping table limit to 
512 entries")
Reported-by: Jerry Snitselaar 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 89647700bab2..494b42a31b7a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -257,7 +257,7 @@
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
 #define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
 #define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_TABLE_LEN   (9ULL << 1)
 #define DTE_IRQ_REMAP_ENABLE1ULL
 
 #define PAGE_MODE_NONE0x00
-- 
2.17.1



Re: [PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries

2020-12-06 Thread Suravee Suthikulpanit

Jerry,

On 12/2/20 6:53 AM, Jerry Snitselaar wrote:


Suravee Suthikulpanit @ 2020-10-14 19:50 MST:


Certain device drivers allocate IO queues on a per-cpu basis.
On AMD EPYC platform, which can support up-to 256 cpu threads,
this can exceed the current MAX_IRQ_PER_TABLE limit of 256,
and result in the error message:

 AMD-Vi: Failed to allocate IRTE

This has been observed with certain NVME devices.

AMD IOMMU hardware can actually support upto 512 interrupt
remapping table entries. Therefore, update the driver to
match the hardware limit.

Please note that this also increases the size of interrupt remapping
table to 8KB per device when using the 128-bit IRTE format.

Signed-off-by: Suravee Suthikulpanit 
---
  drivers/iommu/amd/amd_iommu_types.h | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 30a5d412255a..427484c45589 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache;
  /* Only true if all IOMMUs support device IOTLBs */
  extern bool amd_iommu_iotlb_sup;
  
-#define MAX_IRQS_PER_TABLE	256

+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define MAX_IRQS_PER_TABLE 512
  #define IRQ_TABLE_ALIGNMENT   128
  
  struct irq_remap_table {


With this change should DTE_IRQ_TABLE_LEN be changed to 9? IIUC the spec
correctly leaving it at 8 is saying the table is 256 entries long.


You are correct. Sorry I missed this part. I'll send the fix-up patch ASAP.

Thank you,
Suravee


Re: [PATCH v2] iommu/amd: Enforce 4k mapping for certain IOMMU data structures

2020-11-19 Thread Suravee Suthikulpanit

Will,

To answer your questions from v1 thread.

On 11/18/20 5:57 AM, Will Deacon wrote:
> On 11/5/20 9:58 PM, Suravee Suthikulpanit wrote:
>> AMD IOMMU requires 4k-aligned pages for the event log, the PPR log,
>> and the completion wait write-back regions. However, when allocating
>> the pages, they could be part of large mapping (e.g. 2M) page.
>> This causes #PF due to the SNP RMP hardware enforces the check based
>> on the page level for these data structures.
>
> Please could you include an example backtrace here?

Unfortunately, we don't actually have the backtrace available here.
This information is based on the SEV-SNP specification.

>> So, fix by calling set_memory_4k() on the allocated pages.
>
> I think I'm missing something here. set_memory_4k() will break the kernel
> linear mapping up into page granular mappings, but the IOMMU isn't using
> that mapping, right?

That's correct. This does not affect the IOMMU, but it affects the PSP FW.

> It's just using the physical address returned by iommu_virt_to_phys(), so why 
does it matter?
>
> Just be nice to capture some of this rationale in the log, especially as
> I'm not familiar with this device.

According to the AMD SEV-SNP white paper 
(https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf), 
the Reverse Map Table (RMP) contains one entry for every 4K page of DRAM that may be used by the VM. In this case, the 
pages allocated by the IOMMU driver are added as 4K entries in the RMP table by the SEV-SNP FW.


During the page table walk, the RMP checks if the page is owned by the hypervisor. Without calling set_memory_4k() to 
break the mapping up into 4K pages, pages could end up being part of large mapping (e.g. 2M page), in which the page 
access would be denied and result in #PF.


>> Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait 
write-back semaphore")
>
> I couldn't figure out how that commit could cause this problem. Please can
> you explain that to me?

Hope this helps clarify. If so, I'll update the commit log and send out V3.

Thanks,
Suravee


Re: [PATCH] iommu/amd: Enforce 4k mapping for certain IOMMU data structures

2020-11-19 Thread Suravee Suthikulpanit

Will,

I have already submitted v2 of this patch. Let me move the discussion there 
instead ...
(https://lore.kernel.org/linux-iommu/20201105145832.3065-1-suravee.suthikulpa...@amd.com/)

Suravee

On 11/18/20 5:57 AM, Will Deacon wrote:

On Wed, Oct 28, 2020 at 11:18:24PM +, Suravee Suthikulpanit wrote:

AMD IOMMU requires 4k-aligned pages for the event log, the PPR log,
and the completion wait write-back regions. However, when allocating
the pages, they could be part of large mapping (e.g. 2M) page.
This causes #PF due to the SNP RMP hardware enforces the check based
on the page level for these data structures.


Please could you include an example backtrace here?


So, fix by calling set_memory_4k() on the allocated pages.


I think I'm missing something here. set_memory_4k() will break the kernel
linear mapping up into page granular mappings, but the IOMMU isn't using
that mapping, right? It's just using the physical address returned by
iommu_virt_to_phys(), so why does it matter?

Just be nice to capture some of this rationale in the log, especially as
I'm not familiar with this device.


Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait write-back 
semaphore")


I couldn't figure out how that commit could cause this problem. Please can
you explain that to me?

Cheers,

Will



Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers

2020-11-18 Thread Suravee Suthikulpanit

Tglx,

On 11/18/20 9:06 PM, Thomas Gleixner wrote:

Suravee,

On Wed, Nov 18 2020 at 17:29, Suravee Suthikulpanit wrote:

On 11/17/20 9:00 AM, Suravee Suthikulpanit wrote:

I might need your help debugging this issue. I'm seeing the following error:

[   14.005937] irq 29, desc: d200500b, depth: 0, count: 0, unhandled: 0
[   14.006234] ->handle_irq():  eab4b6eb, handle_bad_irq+0x0/0x230
[   14.006234] ->irq_data.chip(): 1cce6d6b, 
intcapxt_controller+0x0/0x120
[   14.006234] ->action(): 83bfd734
[   14.006234] ->action->handler(): 94806345, 
amd_iommu_int_handler+0x0/0x10
[   14.006234] unexpected IRQ trap at vector 1d

Do you have any idea what might have gone wrong here?


Yes. This lacks setting up the low level flow handler. Delta patch
below.

Thanks,

 tglx
---
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2033,6 +2033,7 @@ static int intcapxt_irqdomain_alloc(stru
  
  		irqd->chip = &intcapxt_controller;

irqd->chip_data = info->data;
+   __irq_set_handler(i, handle_edge_irq, 0, "edge");
}
  
  	return ret;




Yes, this fixes the issue. Now I can receive the IOMMU event log interrupts for IO_PAGE_FAULT event, which is triggered 
using the injection interface via debugfs.


Thanks,
Suravee


Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers

2020-11-18 Thread Suravee Suthikulpanit

David

On 11/17/20 9:00 AM, Suravee Suthikulpanit wrote:

David,

On 11/13/20 10:14 PM, David Woodhouse wrote:

On Wed, 2020-11-11 at 14:30 -0600, Tom Lendacky wrote:

I had trouble cloning your tree for some reason, so just took the top
three patches and applied them to the tip tree. This all appears to be
working. I'll let the IOMMU experts take a closer look (adding Suravee).


Thanks. I see Thomas has taken the first two into the tip.git x86/apic
branch already, so we're just looking for an ack on the third. Which is
this one...

 From 49ee4fa51b8c06d14b7c4c74d15a7d76f865a8ea Mon Sep 17 00:00:00 2001
From: David Woodhouse 
Date: Wed, 11 Nov 2020 12:09:01 +
Subject: [PATCH] iommu/amd: Fix IOMMU interrupt generation in X2APIC mode

The AMD IOMMU has two modes for generating its own interrupts.

The first is very much based on PCI MSI, and can be configured by Linux
precisely that way. But like legacy unmapped PCI MSI it's limited to
8 bits of APIC ID.

The second method does not use PCI MSI at all in hardawre, and instead
configures the INTCAPXT registers in the IOMMU directly with the APIC ID
and vector.

In the latter case, the IOMMU driver would still use pci_enable_msi(),
read back (through MMIO) the MSI message that Linux wrote to the PCI MSI
table, then swizzle those bits into the appropriate register.

Historically, this worked because__irq_compose_msi_msg() would silently
generate an invalid MSI message with the high bits of the APIC ID in the
high bits of the MSI address. That hack was intended only for the Intel
IOMMU, and I recently enforced that, introducing a warning in
__irq_msi_compose_msg() if it was invoked with an APIC ID above 255.

Fix the AMD IOMMU not to depend on that hack any more, by having its own
irqdomain and directly putting the bits from the irq_cfg into the right
place in its ->activate() method.

Fixes: 47bea873cf80 "x86/msi: Only use high bits of MSI address for DMAR unit")
Signed-off-by: David Woodhouse 


I'm still working on testing this series using IO_PAGE_FAULT injection to trigger the IOMMU interrupts. I am still 
debugging some issues, and I'll keep you updated on the findings.


Thanks,
Suravee


I might need your help debugging this issue. I'm seeing the following error:

[   14.005937] irq 29, desc: d200500b, depth: 0, count: 0, unhandled: 0
[   14.006234] ->handle_irq():  eab4b6eb, handle_bad_irq+0x0/0x230
[   14.006234] ->irq_data.chip(): 1cce6d6b, 
intcapxt_controller+0x0/0x120
[   14.006234] ->action(): 83bfd734
[   14.006234] ->action->handler(): 94806345, 
amd_iommu_int_handler+0x0/0x10
[   14.006234] unexpected IRQ trap at vector 1d

Do you have any idea what might have gone wrong here?

Thanks,
Suravee


Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers

2020-11-16 Thread Suravee Suthikulpanit

David,

On 11/13/20 10:14 PM, David Woodhouse wrote:

On Wed, 2020-11-11 at 14:30 -0600, Tom Lendacky wrote:

I had trouble cloning your tree for some reason, so just took the top
three patches and applied them to the tip tree. This all appears to be
working. I'll let the IOMMU experts take a closer look (adding Suravee).


Thanks. I see Thomas has taken the first two into the tip.git x86/apic
branch already, so we're just looking for an ack on the third. Which is
this one...

 From 49ee4fa51b8c06d14b7c4c74d15a7d76f865a8ea Mon Sep 17 00:00:00 2001
From: David Woodhouse 
Date: Wed, 11 Nov 2020 12:09:01 +
Subject: [PATCH] iommu/amd: Fix IOMMU interrupt generation in X2APIC mode

The AMD IOMMU has two modes for generating its own interrupts.

The first is very much based on PCI MSI, and can be configured by Linux
precisely that way. But like legacy unmapped PCI MSI it's limited to
8 bits of APIC ID.

The second method does not use PCI MSI at all in hardawre, and instead
configures the INTCAPXT registers in the IOMMU directly with the APIC ID
and vector.

In the latter case, the IOMMU driver would still use pci_enable_msi(),
read back (through MMIO) the MSI message that Linux wrote to the PCI MSI
table, then swizzle those bits into the appropriate register.

Historically, this worked because__irq_compose_msi_msg() would silently
generate an invalid MSI message with the high bits of the APIC ID in the
high bits of the MSI address. That hack was intended only for the Intel
IOMMU, and I recently enforced that, introducing a warning in
__irq_msi_compose_msg() if it was invoked with an APIC ID above 255.

Fix the AMD IOMMU not to depend on that hack any more, by having its own
irqdomain and directly putting the bits from the irq_cfg into the right
place in its ->activate() method.

Fixes: 47bea873cf80 "x86/msi: Only use high bits of MSI address for DMAR unit")
Signed-off-by: David Woodhouse 


I'm still working on testing this series using IO_PAGE_FAULT injection to trigger the IOMMU interrupts. I am still 
debugging some issues, and I'll keep you updated on the findings.


Thanks,
Suravee


Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support

2020-11-12 Thread Suravee Suthikulpanit

Joerg,

Please ignore to include the V3. I am working on V4 to resubmit.

Thank you,
Suravee

On 11/11/20 10:10 AM, Suravee Suthikulpanit wrote:

Hi Joerg,

Do you have any update on this series?

Thanks,
Suravee

On 11/2/20 10:16 AM, Suravee Suthikulpanit wrote:

Joerg,

You mentioned to remind you to pull this in to linux-next.

Thanks,
Suravee

On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
   - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data.
   - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run 
into
 NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
   - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
 not currently used. (per Robin)
   - Remove unused struct iommu_flush_ops.  (patch 2/13)
   - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
 patch 13/13)

Suravee Suthikulpanit (14):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Introduce IOMMU flush callbacks
   iommu/amd: Adopt IO page table framework

  drivers/iommu/amd/Kconfig   |   1 +
  drivers/iommu/amd/Makefile  |   2 +-
  drivers/iommu/amd/amd_iommu.h   |  22 +
  drivers/iommu/amd/amd_iommu_types.h |  43 +-
  drivers/iommu/amd/io_pgtable.c  | 564 
  drivers/iommu/amd/iommu.c   | 646 +++-
  drivers/iommu/io-pgtable.c  |   3 +
  include/linux/io-pgtable.h  |   2 +
  8 files changed, 691 insertions(+), 592 deletions(-)
  create mode 100644 drivers/iommu/amd/io_pgtable.c



Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support

2020-11-10 Thread Suravee Suthikulpanit

Hi Joerg,

Do you have any update on this series?

Thanks,
Suravee

On 11/2/20 10:16 AM, Suravee Suthikulpanit wrote:

Joerg,

You mentioned to remind you to pull this in to linux-next.

Thanks,
Suravee

On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
   - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data.
   - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run 
into
 NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
   - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
 not currently used. (per Robin)
   - Remove unused struct iommu_flush_ops.  (patch 2/13)
   - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
 patch 13/13)

Suravee Suthikulpanit (14):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Introduce IOMMU flush callbacks
   iommu/amd: Adopt IO page table framework

  drivers/iommu/amd/Kconfig   |   1 +
  drivers/iommu/amd/Makefile  |   2 +-
  drivers/iommu/amd/amd_iommu.h   |  22 +
  drivers/iommu/amd/amd_iommu_types.h |  43 +-
  drivers/iommu/amd/io_pgtable.c  | 564 
  drivers/iommu/amd/iommu.c   | 646 +++-
  drivers/iommu/io-pgtable.c  |   3 +
  include/linux/io-pgtable.h  |   2 +
  8 files changed, 691 insertions(+), 592 deletions(-)
  create mode 100644 drivers/iommu/amd/io_pgtable.c



[PATCH v2] iommu/amd: Enforce 4k mapping for certain IOMMU data structures

2020-11-05 Thread Suravee Suthikulpanit
AMD IOMMU requires 4k-aligned pages for the event log, the PPR log,
and the completion wait write-back regions. However, when allocating
the pages, they could be part of large mapping (e.g. 2M) page.
This causes #PF due to the SNP RMP hardware enforces the check based
on the page level for these data structures.

So, fix by calling set_memory_4k() on the allocated pages.

Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait 
write-back semaphore")
Cc: Brijesh Singh 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 82e4af8f09bb..23a790f8f550 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -672,11 +673,27 @@ static void __init free_command_buffer(struct amd_iommu 
*iommu)
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 }
 
+static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
+gfp_t gfp, size_t size)
+{
+   int order = get_order(size);
+   void *buf = (void *)__get_free_pages(gfp, order);
+
+   if (buf &&
+   iommu_feature(iommu, FEATURE_SNP) &&
+   set_memory_4k((unsigned long)buf, (1 << order))) {
+   free_pages((unsigned long)buf, order);
+   buf = NULL;
+   }
+
+   return buf;
+}
+
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-   iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+   iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ EVT_BUFFER_SIZE);
 
return iommu->evt_buf ? 0 : -ENOMEM;
 }
@@ -715,8 +732,8 @@ static void __init free_event_buffer(struct amd_iommu 
*iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_ppr_log(struct amd_iommu *iommu)
 {
-   iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
+   iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ PPR_LOG_SIZE);
 
return iommu->ppr_log ? 0 : -ENOMEM;
 }
@@ -838,7 +855,7 @@ static int iommu_init_ga(struct amd_iommu *iommu)
 
 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 {
-   iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+   iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 
1);
 
return iommu->cmd_sem ? 0 : -ENOMEM;
 }
-- 
2.17.1



Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support

2020-11-01 Thread Suravee Suthikulpanit

Joerg,

You mentioned to remind you to pull this in to linux-next.

Thanks,
Suravee

On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
   - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data.
   - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run 
into
 NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
   - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
 not currently used. (per Robin)
   - Remove unused struct iommu_flush_ops.  (patch 2/13)
   - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
 patch 13/13)

Suravee Suthikulpanit (14):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Introduce IOMMU flush callbacks
   iommu/amd: Adopt IO page table framework

  drivers/iommu/amd/Kconfig   |   1 +
  drivers/iommu/amd/Makefile  |   2 +-
  drivers/iommu/amd/amd_iommu.h   |  22 +
  drivers/iommu/amd/amd_iommu_types.h |  43 +-
  drivers/iommu/amd/io_pgtable.c  | 564 
  drivers/iommu/amd/iommu.c   | 646 +++-
  drivers/iommu/io-pgtable.c  |   3 +
  include/linux/io-pgtable.h  |   2 +
  8 files changed, 691 insertions(+), 592 deletions(-)
  create mode 100644 drivers/iommu/amd/io_pgtable.c



[PATCH] iommu/amd: Enforce 4k mapping for certain IOMMU data structures

2020-10-28 Thread Suravee Suthikulpanit
AMD IOMMU requires 4k-aligned pages for the event log, the PPR log,
and the completion wait write-back regions. However, when allocating
the pages, they could be part of large mapping (e.g. 2M) page.
This causes #PF due to the SNP RMP hardware enforces the check based
on the page level for these data structures.

So, fix by calling set_memory_4k() on the allocated pages.

Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait 
write-back semaphore")
Cc: Brijesh Singh 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/init.c | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 82e4af8f09bb..75dc30226a7c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -672,11 +673,22 @@ static void __init free_command_buffer(struct amd_iommu 
*iommu)
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 }
 
+static void *__init iommu_alloc_4k_pages(gfp_t gfp, size_t size)
+{
+   void *buf;
+   int order = get_order(size);
+
+   buf = (void *)__get_free_pages(gfp, order);
+   if (!buf)
+   return buf;
+   return set_memory_4k((unsigned long)buf, (1 << order)) ? NULL : buf;
+}
+
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-   iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+   iommu->evt_buf = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO,
+ EVT_BUFFER_SIZE);
 
return iommu->evt_buf ? 0 : -ENOMEM;
 }
@@ -715,8 +727,8 @@ static void __init free_event_buffer(struct amd_iommu 
*iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_ppr_log(struct amd_iommu *iommu)
 {
-   iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
+   iommu->ppr_log = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO,
+ PPR_LOG_SIZE);
 
return iommu->ppr_log ? 0 : -ENOMEM;
 }
@@ -838,7 +850,7 @@ static int iommu_init_ga(struct amd_iommu *iommu)
 
 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 {
-   iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+   iommu->cmd_sem = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO, 1);
 
return iommu->cmd_sem ? 0 : -ENOMEM;
 }
-- 
2.17.1



Re: [PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries

2020-10-25 Thread Suravee Suthikulpanit

Hi Joerg,

Do you have any concerns regarding this patch?

Thanks,
Suravee

On 10/15/20 9:50 AM, Suravee Suthikulpanit wrote:

Certain device drivers allocate IO queues on a per-cpu basis.
On AMD EPYC platform, which can support up-to 256 cpu threads,
this can exceed the current MAX_IRQ_PER_TABLE limit of 256,
and result in the error message:

 AMD-Vi: Failed to allocate IRTE

This has been observed with certain NVME devices.

AMD IOMMU hardware can actually support upto 512 interrupt
remapping table entries. Therefore, update the driver to
match the hardware limit.

Please note that this also increases the size of interrupt remapping
table to 8KB per device when using the 128-bit IRTE format.

Signed-off-by: Suravee Suthikulpanit 
---
  drivers/iommu/amd/amd_iommu_types.h | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 30a5d412255a..427484c45589 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache;
  /* Only true if all IOMMUs support device IOTLBs */
  extern bool amd_iommu_iotlb_sup;
  
-#define MAX_IRQS_PER_TABLE	256

+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define MAX_IRQS_PER_TABLE 512
  #define IRQ_TABLE_ALIGNMENT   128
  
  struct irq_remap_table {




[PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries

2020-10-14 Thread Suravee Suthikulpanit
Certain device drivers allocate IO queues on a per-cpu basis.
On AMD EPYC platform, which can support up-to 256 cpu threads,
this can exceed the current MAX_IRQ_PER_TABLE limit of 256,
and result in the error message:

AMD-Vi: Failed to allocate IRTE

This has been observed with certain NVME devices.

AMD IOMMU hardware can actually support upto 512 interrupt
remapping table entries. Therefore, update the driver to
match the hardware limit.

Please note that this also increases the size of interrupt remapping
table to 8KB per device when using the 128-bit IRTE format.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 30a5d412255a..427484c45589 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache;
 /* Only true if all IOMMUs support device IOTLBs */
 extern bool amd_iommu_iotlb_sup;
 
-#define MAX_IRQS_PER_TABLE 256
+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define MAX_IRQS_PER_TABLE 512
 #define IRQ_TABLE_ALIGNMENT128
 
 struct irq_remap_table {
-- 
2.17.1



Re: [PATCH] KVM: SVM: Initialize prev_ga_tag before use

2020-10-09 Thread Suravee Suthikulpanit

Paolo,

Are there any issues or concerns about this patch?

Thank you,
Suravee

On 10/4/20 6:27 AM, Suravee Suthikulpanit wrote:

The function amd_ir_set_vcpu_affinity makes use of the parameter struct
amd_iommu_pi_data.prev_ga_tag to determine if it should delete struct
amd_iommu_pi_data from a list when not running in AVIC mode.

However, prev_ga_tag is initialized only when AVIC is enabled. The non-zero
uninitialized value can cause unintended code path, which ends up making
use of the struct vcpu_svm.ir_list and ir_list_lock without being
initialized (since they are intended only for the AVIC case).

This triggers NULL pointer dereference bug in the function vm_ir_list_del
with the following call trace:

 svm_update_pi_irte+0x3c2/0x550 [kvm_amd]
 ? proc_create_single_data+0x41/0x50
 kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm]
 __connect+0x5f/0xb0 [irqbypass]
 irq_bypass_register_producer+0xf8/0x120 [irqbypass]
 vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci]
 vfio_msi_set_block+0x77/0xe0 [vfio_pci]
 vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci]
 vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci]
 vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci]
 ? alloc_file_pseudo+0xa5/0x100
 vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
 ? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
 __x64_sys_ioctl+0x96/0xd0
 do_syscall_64+0x37/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Therefore, initialize prev_ga_tag to zero before use. This should be safe
because ga_tag value 0 is invalid (see function avic_vm_init).

Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into 
avic_init_vcpu()")
Signed-off-by: Suravee Suthikulpanit 
---
  arch/x86/kvm/svm/avic.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ac830cd50830..381d22daa4ac 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -868,6 +868,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int 
host_irq,
 * - Tell IOMMU to use legacy mode for this interrupt.
 * - Retrieve ga_tag of prior interrupt remapping data.
 */
+   pi.prev_ga_tag = 0;
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
  



[PATCH v3 10/14] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable

2020-10-03 Thread Suravee Suthikulpanit
To simplify the fetch_pte function. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 13 +++--
 drivers/iommu/amd/iommu.c  |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2059e64fdc53..69996e57fae2 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom,
 extern unsigned long iommu_unmap_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long page_size);
-extern u64 *fetch_pte(struct protection_domain *domain,
+extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  unsigned long address,
  unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 989db64a89a7..93ff8cb452ed 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct protection_domain *domain,
+u64 *fetch_pte(struct amd_io_pgtable *pgtable,
   unsigned long address,
   unsigned long *page_size)
 {
@@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain,
 
*page_size = 0;
 
-   if (address > PM_LEVEL_SIZE(domain->iop.mode))
+   if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
 
-   level  =  domain->iop.mode - 1;
-   pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+   level  =  pgtable->mode - 1;
+   pte= &pgtable->root[PM_LEVEL_INDEX(level, address)];
*page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
while (level > 0) {
@@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
   unsigned long iova,
   unsigned long size)
 {
+   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
@@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
unmapped = 0;
 
while (unmapped < size) {
-   pte = fetch_pte(dom, iova, &unmap_size);
-
+   pte = fetch_pte(pgtable, iova, &unmap_size);
if (pte) {
int i, count;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 3f6ede1e572c..87cea1cde414 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
  dma_addr_t iova)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
 
if (domain->iop.mode == PAGE_MODE_NONE)
return iova;
 
-   pte = fetch_pte(domain, iova, &pte_pgsize);
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
-- 
2.17.1



[PATCH v3 13/14] iommu/amd: Introduce IOMMU flush callbacks

2020-10-03 Thread Suravee Suthikulpanit
Add TLB flush callback functions, which are used by the IO
page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index d8b329aa0bb2..3c2faa47ea5d 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -514,6 +514,33 @@ static phys_addr_t iommu_v1_iova_to_phys(struct 
io_pgtable_ops *ops, unsigned lo
 /*
  * 
  */
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_flush_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+unsigned long iova, size_t granule,
+void *cookie)
+{
+}
+
+const struct iommu_flush_ops v1_flush_ops = {
+   .tlb_flush_all  = v1_tlb_flush_all,
+   .tlb_flush_walk = v1_tlb_flush_walk,
+   .tlb_flush_leaf = v1_tlb_flush_leaf,
+   .tlb_add_page   = v1_tlb_add_page,
+};
+
 static void v1_free_pgtable(struct io_pgtable *iop)
 {
 }
@@ -526,6 +553,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
pgtable->iop.ops.unmap= iommu_v1_unmap_page;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
 
+   cfg->tlb = &v1_flush_ops;
+
return &pgtable->iop;
 }
 
-- 
2.17.1



[PATCH v3 11/14] iommu/amd: Introduce iommu_v1_iova_to_phys

2020-10-03 Thread Suravee Suthikulpanit
This implements iova_to_phys for AMD IOMMU v1 pagetable,
which will be used by the IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 22 ++
 drivers/iommu/amd/iommu.c  | 16 +---
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 93ff8cb452ed..7841e5e1e563 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
return unmapped;
 }
 
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned 
long iova)
+{
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+   unsigned long offset_mask, pte_pgsize;
+   u64 *pte, __pte;
+
+   if (pgtable->mode == PAGE_MODE_NONE)
+   return iova;
+
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+   if (!pte || !IOMMU_PTE_PRESENT(*pte))
+   return 0;
+
+   offset_mask = pte_pgsize - 1;
+   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
+
+   return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
 /*
  * 
  */
@@ -505,6 +525,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
 
+   pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+
return &pgtable->iop;
 }
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 87cea1cde414..9a1a16031e00 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
 {
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
-   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
-   unsigned long offset_mask, pte_pgsize;
-   u64 *pte, __pte;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
-   return iova;
-
-   pte = fetch_pte(pgtable, iova, &pte_pgsize);
-
-   if (!pte || !IOMMU_PTE_PRESENT(*pte))
-   return 0;
-
-   offset_mask = pte_pgsize - 1;
-   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
-
-   return (__pte & ~offset_mask) | (iova & offset_mask);
+   return ops->iova_to_phys(ops, iova);
 }
 
 static bool amd_iommu_capable(enum iommu_cap cap)
-- 
2.17.1



[PATCH v3 06/14] iommu/amd: Move IO page table related functions

2020-10-03 Thread Suravee Suthikulpanit
Preparing to migrate to use IO page table framework.
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  18 ++
 drivers/iommu/amd/io_pgtable.c | 473 
 drivers/iommu/amd/iommu.c  | 476 +
 3 files changed, 493 insertions(+), 474 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8b7be9171030..ee7ff4d827e1 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -122,4 +122,22 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
+/* TODO: These are temporary and will be removed once fully transition */
+extern void free_pagetable(struct domain_pgtable *pgtable);
+extern int iommu_map_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long phys_addr,
+ unsigned long page_size,
+ int prot,
+ gfp_t gfp);
+extern unsigned long iommu_unmap_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long page_size);
+extern u64 *fetch_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long *page_size);
+extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+struct domain_pgtable *pgtable);
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+u64 *root, int mode);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 6b2de9e467d9..c11355afe624 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -23,6 +23,479 @@
 #include "amd_iommu_types.h"
 #include "amd_iommu.h"
 
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+unsigned long *count)
+{
+   unsigned long pte_mask, pg_size, cnt;
+   u64 *fpte;
+
+   pg_size  = PTE_PAGE_SIZE(*pte);
+   cnt  = PAGE_SIZE_PTE_COUNT(pg_size);
+   pte_mask = ~((cnt << 3) - 1);
+   fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+   if (page_size)
+   *page_size = pg_size;
+
+   if (count)
+   *count = cnt;
+
+   return fpte;
+}
+
+/
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ /
+
+static void free_page_list(struct page *freelist)
+{
+   while (freelist != NULL) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+   struct page *p = virt_to_page((void *)pt);
+
+   p->freelist = freelist;
+
+   return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) 
\
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)  
\
+{  
\
+   unsigned long p;
\
+   u64 *pt;
\
+   int i;  
\
+   
\
+   pt = (u64 *)__pt;   
\
+   
\
+   for (i = 0; i < 512; ++i) { 
\
+   /* PTE present? */  
\
+   if (!IOMMU_PTE_PRESENT(pt[i]))  
\
+   continue;   
\
+   
\
+   /* Large PTE? */
\
+   if (PM_PTE_LEVEL(pt[i]) == 0 || 
\
+   PM_PTE_LEVEL(pt[i]) == 7)   
\
+   continue;   
\
+   
\
+   p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);  

[PATCH v3 14/14] iommu/amd: Adopt IO page table framework

2020-10-03 Thread Suravee Suthikulpanit
Switch to using IO page table framework for AMD IOMMU v1 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/iommu.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 77f44b927ae7..6f8316206fb8 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1573,6 +1574,22 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
return ret;
 }
 
+struct io_pgtable_ops *
+amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data,
+  struct protection_domain *domain)
+{
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[dev_data->devid];
+
+   domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) {
+   .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
+   .ias= IOMMU_IN_ADDR_BIT_SIZE,
+   .oas= IOMMU_OUT_ADDR_BIT_SIZE,
+   .iommu_dev  = &iommu->dev->dev,
+   };
+
+   return alloc_io_pgtable_ops(AMD_IOMMU_V1, &domain->iop.pgtbl_cfg, 
domain);
+}
+
 /*
  * If a device is not yet associated with a domain, this function makes the
  * device visible in the domain
@@ -1580,6 +1597,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
 static int attach_device(struct device *dev,
 struct protection_domain *domain)
 {
+   struct io_pgtable_ops *pgtbl_ops;
struct iommu_dev_data *dev_data;
struct pci_dev *pdev;
unsigned long flags;
@@ -1623,6 +1641,12 @@ static int attach_device(struct device *dev,
 skip_ats_check:
ret = 0;
 
+   pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain);
+   if (!pgtbl_ops) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
do_attach(dev_data, domain);
 
/*
@@ -1958,6 +1982,8 @@ static void amd_iommu_domain_free(struct iommu_domain 
*dom)
if (domain->dev_cnt > 0)
cleanup_domain(domain);
 
+   free_io_pgtable_ops(&domain->iop.iop.ops);
+
BUG_ON(domain->dev_cnt != 0);
 
if (!dom)
-- 
2.17.1



[PATCH v3 12/14] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page

2020-10-03 Thread Suravee Suthikulpanit
These implement map and unmap for AMD IOMMU v1 pagetable, which
will be used by the IO pagetable framework.

Also clean up unused extern function declarations.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  | 13 -
 drivers/iommu/amd/io_pgtable.c | 25 -
 drivers/iommu/amd/iommu.c  |  7 ---
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 69996e57fae2..2e8dc2a1ec0f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
-/* TODO: These are temporary and will be removed once fully transition */
-extern int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp);
-extern unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size);
-extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 7841e5e1e563..d8b329aa0bb2 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct amd_io_pgtable *pgtable,
-  unsigned long address,
-  unsigned long *page_size)
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
 {
int level;
u64 *pte;
@@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-int iommu_map_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long paddr,
-  unsigned long size,
-  int prot,
-  gfp_t gfp)
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
+   struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
struct page *freelist = NULL;
bool updated = false;
u64 __pte, *pte;
@@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom,
return ret;
 }
 
-unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long size)
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
 {
-   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
@@ -525,6 +522,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
 
+   pgtable->iop.ops.map  = iommu_v1_map_page;
+   pgtable->iop.ops.unmap= iommu_v1_unmap_page;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
 
return &pgtable->iop;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9a1a16031e00..77f44b927ae7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
 gfp_t gfp)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
int ret;
 
@@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
 
-   ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
+   ret = ops->map(o

[PATCH v3 09/14] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops

2020-10-03 Thread Suravee Suthikulpanit
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 6c063d2c8bf0..989db64a89a7 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * and full 64 bit address spaces.
  */
 int iommu_map_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long phys_addr,
-  unsigned long page_size,
+  unsigned long iova,
+  unsigned long paddr,
+  unsigned long size,
   int prot,
   gfp_t gfp)
 {
@@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom,
u64 __pte, *pte;
int ret, i, count;
 
-   BUG_ON(!IS_ALIGNED(bus_addr, page_size));
-   BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+   BUG_ON(!IS_ALIGNED(iova, size));
+   BUG_ON(!IS_ALIGNED(paddr, size));
 
ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
goto out;
 
-   count = PAGE_SIZE_PTE_COUNT(page_size);
-   pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+   count = PAGE_SIZE_PTE_COUNT(size);
+   pte   = alloc_pte(dom, iova, size, NULL, gfp, &updated);
 
ret = -ENOMEM;
if (!pte)
@@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom,
updated = true;
 
if (count > 1) {
-   __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
+   __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+   __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom,
 }
 
 unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long page_size)
+  unsigned long iova,
+  unsigned long size)
 {
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
 
-   BUG_ON(!is_power_of_2(page_size));
+   BUG_ON(!is_power_of_2(size));
 
unmapped = 0;
 
-   while (unmapped < page_size) {
-
-   pte = fetch_pte(dom, bus_addr, &unmap_size);
+   while (unmapped < size) {
+   pte = fetch_pte(dom, iova, &unmap_size);
 
if (pte) {
int i, count;
@@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
pte[i] = 0ULL;
}
 
-   bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
+   iova = (iova & ~(unmap_size - 1)) + unmap_size;
unmapped += unmap_size;
}
 
-- 
2.17.1



[PATCH v3 02/14] iommu/amd: Prepare for generic IO page table framework

2020-10-03 Thread Suravee Suthikulpanit
Add initial hook up code to implement generic IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/Kconfig   |  1 +
 drivers/iommu/amd/Makefile  |  2 +-
 drivers/iommu/amd/amd_iommu_types.h | 35 +++
 drivers/iommu/amd/io_pgtable.c  | 43 +
 drivers/iommu/amd/iommu.c   | 10 ---
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 ++
 7 files changed, 85 insertions(+), 11 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+   select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
  With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index f696ac7c5f89..e3ac3e57e507 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
 
 #define GA_GUEST_NR0x1
 
+#define IOMMU_IN_ADDR_BIT_SIZE  52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES  ((~0xFFFUL) & ~(2ULL << 38))
+
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
@@ -461,6 +475,26 @@ struct amd_irte_ops;
 
 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
 
+#define io_pgtable_to_data(x) \
+   container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+   io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+   container_of(io_pgtable_ops_to_data(x), \
+struct protection_domain, iop)
+
+#define io_pgtable_cfg_to_data(x) \
+   container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+
+struct amd_io_pgtable {
+   struct io_pgtable_cfg   pgtbl_cfg;
+   struct io_pgtable   iop;
+   int mode;
+   u64 *root;
+};
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -469,6 +503,7 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
   iommu core code */
+   struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
atomic64_t pt_root; /* pgtable root and pgtable mode */
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index ..6b2de9e467d9
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit 
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt)pr_fmt(fmt)
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+/*
+ * 
+ */
+static void v1_free_pgtable(struct io_pgtable *iop)
+{
+}
+
+static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
+{
+   struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+
+   return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
+   .alloc  = v1_alloc_pgtable,
+   .free   = v1_free_pgtable,
+};
diff

[PATCH v3 08/14] iommu/amd: Remove amd_iommu_domain_get_pgtable

2020-10-03 Thread Suravee Suthikulpanit
Since the IO page table root and mode parameters have been moved into
the struct amd_io_pg, the function is no longer needed. Therefore,
remove it along with the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   |  4 ++--
 drivers/iommu/amd/amd_iommu_types.h |  6 -
 drivers/iommu/amd/io_pgtable.c  | 36 ++---
 drivers/iommu/amd/iommu.c   | 34 ---
 4 files changed, 19 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8dff7d85be79..2059e64fdc53 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -101,6 +101,8 @@ static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
atomic64_set(&domain->iop.pt_root, root);
+   domain->iop.root = (u64 *)(root & PAGE_MASK);
+   domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
 static inline
@@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct 
protection_domain *dom,
 extern u64 *fetch_pte(struct protection_domain *domain,
  unsigned long address,
  unsigned long *page_size);
-extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
-struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 80b5c34357ed..de3fe9433080 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -514,12 +514,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
-/* For decocded pt_root */
-struct domain_pgtable {
-   int mode;
-   u64 *root;
-};
-
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 23e82da2dea8..6c063d2c8bf0 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -184,30 +184,27 @@ static bool increase_address_space(struct 
protection_domain *domain,
   unsigned long address,
   gfp_t gfp)
 {
-   struct domain_pgtable pgtable;
unsigned long flags;
bool ret = true;
u64 *pte;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   if (address <= PM_LEVEL_SIZE(pgtable.mode))
+   if (address <= PM_LEVEL_SIZE(domain->iop.mode))
goto out;
 
ret = false;
-   if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
+   if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
 
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
 
-   *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+   *pte = PM_LEVEL_PDE(domain->iop.mode, 
iommu_virt_to_phys(domain->iop.root));
 
-   pgtable.root  = pte;
-   pgtable.mode += 1;
+   domain->iop.root  = pte;
+   domain->iop.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
amd_iommu_domain_flush_complete(domain);
 
@@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain 
*domain,
 * Device Table needs to be updated and flushed before the new root can
 * be published.
 */
-   amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
+   amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
ret = true;
 
@@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain,
  gfp_t gfp,
  bool *updated)
 {
-   struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
 
BUG_ON(!is_power_of_2(page_size));
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+   while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
/*
 * Return an error if there is no memory to update the
 * page-table.
 */
if (!increase_address_space(domain, address, gfp))
return NULL;
-
-   /* Read new values to check if update was successful */
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
}
 
 
-   level   = pgtable.mode - 1;
-   pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
+   level   = domain-

[PATCH v3 05/14] iommu/amd: Declare functions as extern

2020-10-03 Thread Suravee Suthikulpanit
And move declaration to header file so that they can be included across
multiple files. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  3 +++
 drivers/iommu/amd/iommu.c | 39 +--
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 22ecacb71675..8b7be9171030 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain 
*dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
 extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 09da37c4c9c4..f91f35edb7ba 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -88,7 +88,6 @@ struct iommu_cmd {
 
 struct kmem_cache *amd_iommu_irq_cache;
 
-static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /
@@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain 
*domain,
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
 {
int i;
 
@@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct 
protection_domain *domain,
 
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
 }
@@ -1481,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
pgtable.root  = pte;
pgtable.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/*
 * Device Table needs to be updated and flushed before the new root can
@@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom,
 * Updates and flushing already happened in
 * increase_address_space().
 */
-   domain_flush_tlb_pde(dom);
-   domain_flush_complete(dom);
+   amd_iommu_domain_flush_tlb_pde(dom);
+   amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
 
@@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
 
/* Flush IOTLB */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
/* Wait for the flushes to finish */
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev,
 * left the caches in the IOMMU dirty. So we have to flush
 * here to evict all dirty stuff.
 */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
 out:
spin_unlock(&dev_data->lock);
@@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct 
protection_domain *domain)
domain_flush_devices(domain);
 }
 
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
 {
struct domain_pgtable pgtable;
 
@@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain 
*domain)
amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
-   domain_flush_tlb_pde(domain);
- 

[PATCH v3 04/14] iommu/amd: Convert to using amd_io_pgtable

2020-10-03 Thread Suravee Suthikulpanit
Make use of the new struct amd_io_pgtable in preparation to remove
the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  1 +
 drivers/iommu/amd/iommu.c | 25 ++---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index da6e09657e00..22ecacb71675 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain 
*dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c8b8619cc744..09da37c4c9c4 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache;
 
 static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
 
 /
  *
@@ -1482,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
 
pgtable.root  = pte;
pgtable.mode += 1;
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
domain_flush_complete(domain);
 
/*
@@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain 
*domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
  bool ats, bool ppr)
 {
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
 
-   if (pgtable->mode != PAGE_MODE_NONE)
-   pte_root = iommu_virt_to_phys(pgtable->root);
+   if (domain->iop.mode != PAGE_MODE_NONE)
+   pte_root = iommu_virt_to_phys(domain->iop.root);
 
-   pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+   pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   set_dte_entry(dev_data->devid, domain, &pgtable,
+   set_dte_entry(dev_data->devid, domain,
  ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
 
@@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct 
iommu_domain *domain,
  *
  */
 
-static void update_device_table(struct protection_domain *domain,
-   struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
 {
struct iommu_dev_data *dev_data;
 
list_for_each_entry(dev_data, &domain->dev_list, list) {
-   set_dte_entry(dev_data->devid, domain, pgtable,
+   set_dte_entry(dev_data->devid, domain,
  dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
 }
 
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
 {
-   update_device_table(domain, pgtable);
+   update_device_table(domain);
domain_flush_devices(domain);
 }
 
@@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain 
*domain)
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
-- 
2.17.1



[PATCH v3 01/14] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline

2020-10-03 Thread Suravee Suthikulpanit
Move the function to header file to allow inclusion in other files.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h | 13 +
 drivers/iommu/amd/iommu.c | 10 --
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 57309716fd18..97cdb235ce69 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -93,6 +93,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
 }
 
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+   atomic64_set(&domain->pt_root, root);
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+   amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
 extern bool translation_pre_enabled(struct amd_iommu *iommu);
 extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 struct device *dev);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index db4fb840c59c..e92b3f744292 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -162,16 +162,6 @@ static void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 
root)
-{
-   atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
-   amd_iommu_domain_set_pt_root(domain, 0);
-}
-
 static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode)
 {
-- 
2.17.1



[PATCH v3 07/14] iommu/amd: Restructure code for freeing page table

2020-10-03 Thread Suravee Suthikulpanit
Introduce amd_iommu_free_pgtable helper function, which consolidates
logic for freeing page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 12 +++-
 drivers/iommu/amd/iommu.c  | 19 ++-
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index ee7ff4d827e1..8dff7d85be79 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
 /* TODO: These are temporary and will be removed once fully transition */
-extern void free_pagetable(struct domain_pgtable *pgtable);
 extern int iommu_map_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long phys_addr,
@@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
 struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
+extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index c11355afe624..23e82da2dea8 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -136,14 +136,24 @@ static struct page *free_sub_pt(unsigned long root, int 
mode,
return freelist;
 }
 
-void free_pagetable(struct domain_pgtable *pgtable)
+void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable)
 {
+   struct protection_domain *dom;
struct page *freelist = NULL;
unsigned long root;
 
if (pgtable->mode == PAGE_MODE_NONE)
return;
 
+   dom = container_of(pgtable, struct protection_domain, iop);
+
+   /* Update data structure */
+   amd_iommu_domain_clr_pt_root(dom);
+
+   /* Make changes visible to IOMMUs */
+   amd_iommu_domain_update(dom);
+
+   /* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
   pgtable->mode > PAGE_MODE_6_LEVEL);
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4d65f64236b6..cbbea7b952fb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain 
*domain)
 
 static void protection_domain_free(struct protection_domain *domain)
 {
-   struct domain_pgtable pgtable;
-
if (!domain)
return;
 
if (domain->id)
domain_id_free(domain->id);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-   amd_iommu_domain_clr_pt_root(domain);
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
kfree(domain);
 }
@@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
struct protection_domain *domain = to_pdomain(dom);
-   struct domain_pgtable pgtable;
unsigned long flags;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   /* First save pgtable configuration*/
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   /* Remove page-table from domain */
-   amd_iommu_domain_clr_pt_root(domain);
-
-   /* Make changes visible to IOMMUs */
-   amd_iommu_domain_update(domain);
-
-   /* Page-table is not visible to IOMMU anymore, so free it */
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
spin_unlock_irqrestore(&domain->lock, flags);
 }
-- 
2.17.1



[PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support

2020-10-03 Thread Suravee Suthikulpanit
The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V2 
(https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t)
  - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data.
  - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run 
into
NULL pointer bug when calling free_io_pgtable_ops if not defined.

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
  - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
not currently used. (per Robin)
  - Remove unused struct iommu_flush_ops.  (patch 2/13)
  - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
patch 13/13)

Suravee Suthikulpanit (14):
  iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
  iommu/amd: Prepare for generic IO page table framework
  iommu/amd: Move pt_root to to struct amd_io_pgtable
  iommu/amd: Convert to using amd_io_pgtable
  iommu/amd: Declare functions as extern
  iommu/amd: Move IO page table related functions
  iommu/amd: Restructure code for freeing page table
  iommu/amd: Remove amd_iommu_domain_get_pgtable
  iommu/amd: Rename variables to be consistent with struct
io_pgtable_ops
  iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
  iommu/amd: Introduce iommu_v1_iova_to_phys
  iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
  iommu/amd: Introduce IOMMU flush callbacks
  iommu/amd: Adopt IO page table framework

 drivers/iommu/amd/Kconfig   |   1 +
 drivers/iommu/amd/Makefile  |   2 +-
 drivers/iommu/amd/amd_iommu.h   |  22 +
 drivers/iommu/amd/amd_iommu_types.h |  43 +-
 drivers/iommu/amd/io_pgtable.c  | 564 
 drivers/iommu/amd/iommu.c   | 646 +++-
 drivers/iommu/io-pgtable.c  |   3 +
 include/linux/io-pgtable.h  |   2 +
 8 files changed, 691 insertions(+), 592 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

-- 
2.17.1



[PATCH v3 03/14] iommu/amd: Move pt_root to to struct amd_io_pgtable

2020-10-03 Thread Suravee Suthikulpanit
To better organize the data structure since it contains IO page table
related information.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   | 2 +-
 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 drivers/iommu/amd/iommu.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 97cdb235ce69..da6e09657e00 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
 static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
-   atomic64_set(&domain->pt_root, root);
+   atomic64_set(&domain->iop.pt_root, root);
 }
 
 static inline
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index e3ac3e57e507..80b5c34357ed 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -493,6 +493,7 @@ struct amd_io_pgtable {
struct io_pgtable   iop;
int mode;
u64 *root;
+   atomic64_t pt_root; /* pgtable root and pgtable mode */
 };
 
 /*
@@ -506,7 +507,6 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
-   atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx;/* Number of levels for GCR3 table */
u64 *gcr3_tbl;  /* Guest CR3 table */
unsigned long flags;/* flags to find out type of domain */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2b7eb51dcbb8..c8b8619cc744 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct 
iommu_domain *dom)
 static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
 struct domain_pgtable *pgtable)
 {
-   u64 pt_root = atomic64_read(&domain->pt_root);
+   u64 pt_root = atomic64_read(&domain->iop.pt_root);
 
pgtable->root = (u64 *)(pt_root & PAGE_MASK);
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-- 
2.17.1



Re: [PATCH] KVM: SVM: Initialize ir_list and ir_list_lock regardless of AVIC enablement

2020-10-03 Thread Suravee Suthikulpanit

Paolo,

On 9/28/20 3:01 PM, Paolo Bonzini wrote:

On 28/09/20 07:53, Suravee Suthikulpanit wrote:

Hi,

Are there any issues or concerns about this patch?


Yes, sorry I haven't replied yet.  Looks like Linus is doing an -rc8 so
there's plenty of time to have it in 5.9.

The thing I'm wondering is, why is svm_update_pi_irte doing anything if
you don't have AVIC enabled?  In other word, this might not be the root
cause of the bug.  You always get to the "else" branch of the loop of
course, and I'm not sure how irq_set_vcpu_affinity returns something
with pi.prev_ga_tag set.


You are right. pi_prev_ga_tag needs to be initialized before used
(in case AVIC is not enabled). I have already sent out another patch
to properly fix the issue instead with subject
(KVM: SVM: Initialize prev_ga_tag before use).

Thanks,
Suravee


[PATCH] KVM: SVM: Initialize prev_ga_tag before use

2020-10-03 Thread Suravee Suthikulpanit
The function amd_ir_set_vcpu_affinity makes use of the parameter struct
amd_iommu_pi_data.prev_ga_tag to determine if it should delete struct
amd_iommu_pi_data from a list when not running in AVIC mode.

However, prev_ga_tag is initialized only when AVIC is enabled. The non-zero
uninitialized value can cause unintended code path, which ends up making
use of the struct vcpu_svm.ir_list and ir_list_lock without being
initialized (since they are intended only for the AVIC case).

This triggers NULL pointer dereference bug in the function vm_ir_list_del
with the following call trace:

svm_update_pi_irte+0x3c2/0x550 [kvm_amd]
? proc_create_single_data+0x41/0x50
kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm]
__connect+0x5f/0xb0 [irqbypass]
irq_bypass_register_producer+0xf8/0x120 [irqbypass]
vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci]
vfio_msi_set_block+0x77/0xe0 [vfio_pci]
vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci]
vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci]
vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci]
? alloc_file_pseudo+0xa5/0x100
vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
__x64_sys_ioctl+0x96/0xd0
do_syscall_64+0x37/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xa9

Therefore, initialize prev_ga_tag to zero before use. This should be safe
because ga_tag value 0 is invalid (see function avic_vm_init).

Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into 
avic_init_vcpu()")
Signed-off-by: Suravee Suthikulpanit 
---
 arch/x86/kvm/svm/avic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ac830cd50830..381d22daa4ac 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -868,6 +868,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int 
host_irq,
 * - Tell IOMMU to use legacy mode for this interrupt.
 * - Retrieve ga_tag of prior interrupt remapping data.
 */
+   pi.prev_ga_tag = 0;
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
 
-- 
2.17.1



Re: [PATCH v2 00/13] iommu/amd: Add Generic IO Page Table Framework Support

2020-10-03 Thread Suravee Suthikulpanit

I found an issue w/ this series. Please ignore. I'll send out V3.

Regards,
Suravee

On 10/2/20 7:28 PM, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
   - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
 not currently used. (per Robin)
   - Remove unused struct iommu_flush_ops.  (patch 2/13)
   - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
 patch 13/13)

Suravee Suthikulpanit (13):
   iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
   iommu/amd: Prepare for generic IO page table framework
   iommu/amd: Move pt_root to to struct amd_io_pgtable
   iommu/amd: Convert to using amd_io_pgtable
   iommu/amd: Declare functions as extern
   iommu/amd: Move IO page table related functions
   iommu/amd: Restructure code for freeing page table
   iommu/amd: Remove amd_iommu_domain_get_pgtable
   iommu/amd: Rename variables to be consistent with struct
 io_pgtable_ops
   iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
   iommu/amd: Introduce iommu_v1_iova_to_phys
   iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
   iommu/amd: Adopt IO page table framework

  drivers/iommu/amd/Kconfig   |   1 +
  drivers/iommu/amd/Makefile  |   2 +-
  drivers/iommu/amd/amd_iommu.h   |  22 +
  drivers/iommu/amd/amd_iommu_types.h |  40 +-
  drivers/iommu/amd/io_pgtable.c  | 534 +++
  drivers/iommu/amd/iommu.c   | 644 +++-
  drivers/iommu/io-pgtable.c  |   3 +
  include/linux/io-pgtable.h  |   2 +
  8 files changed, 656 insertions(+), 592 deletions(-)
  create mode 100644 drivers/iommu/amd/io_pgtable.c



[PATCH v2 08/13] iommu/amd: Remove amd_iommu_domain_get_pgtable

2020-10-02 Thread Suravee Suthikulpanit
Since the IO page table root and mode parameters have been moved into
the struct amd_io_pg, the function is no longer needed. Therefore,
remove it along with the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   |  4 ++--
 drivers/iommu/amd/amd_iommu_types.h |  6 -
 drivers/iommu/amd/io_pgtable.c  | 36 ++---
 drivers/iommu/amd/iommu.c   | 34 ---
 4 files changed, 19 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8dff7d85be79..2059e64fdc53 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -101,6 +101,8 @@ static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
atomic64_set(&domain->iop.pt_root, root);
+   domain->iop.root = (u64 *)(root & PAGE_MASK);
+   domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
 static inline
@@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct 
protection_domain *dom,
 extern u64 *fetch_pte(struct protection_domain *domain,
  unsigned long address,
  unsigned long *page_size);
-extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
-struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 5d53b7bec256..a07af389eae1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -511,12 +511,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
-/* For decocded pt_root */
-struct domain_pgtable {
-   int mode;
-   u64 *root;
-};
-
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 0c886419166b..a2acd7e85ec3 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -184,30 +184,27 @@ static bool increase_address_space(struct 
protection_domain *domain,
   unsigned long address,
   gfp_t gfp)
 {
-   struct domain_pgtable pgtable;
unsigned long flags;
bool ret = true;
u64 *pte;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   if (address <= PM_LEVEL_SIZE(pgtable.mode))
+   if (address <= PM_LEVEL_SIZE(domain->iop.mode))
goto out;
 
ret = false;
-   if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
+   if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
 
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
 
-   *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+   *pte = PM_LEVEL_PDE(domain->iop.mode, 
iommu_virt_to_phys(domain->iop.root));
 
-   pgtable.root  = pte;
-   pgtable.mode += 1;
+   domain->iop.root  = pte;
+   domain->iop.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
amd_iommu_domain_flush_complete(domain);
 
@@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain 
*domain,
 * Device Table needs to be updated and flushed before the new root can
 * be published.
 */
-   amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
+   amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
ret = true;
 
@@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain,
  gfp_t gfp,
  bool *updated)
 {
-   struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
 
BUG_ON(!is_power_of_2(page_size));
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+   while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
/*
 * Return an error if there is no memory to update the
 * page-table.
 */
if (!increase_address_space(domain, address, gfp))
return NULL;
-
-   /* Read new values to check if update was successful */
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
}
 
 
-   level   = pgtable.mode - 1;
-   pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
+   level   = domain-

[PATCH v2 06/13] iommu/amd: Move IO page table related functions

2020-10-02 Thread Suravee Suthikulpanit
Preparing to migrate to use IO page table framework.
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  18 ++
 drivers/iommu/amd/io_pgtable.c | 473 
 drivers/iommu/amd/iommu.c  | 476 +
 3 files changed, 493 insertions(+), 474 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8b7be9171030..ee7ff4d827e1 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -122,4 +122,22 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
+/* TODO: These are temporary and will be removed once fully transition */
+extern void free_pagetable(struct domain_pgtable *pgtable);
+extern int iommu_map_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long phys_addr,
+ unsigned long page_size,
+ int prot,
+ gfp_t gfp);
+extern unsigned long iommu_unmap_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long page_size);
+extern u64 *fetch_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long *page_size);
+extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+struct domain_pgtable *pgtable);
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+u64 *root, int mode);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index f123ab6e8a51..7fd3dd9db197 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -23,6 +23,479 @@
 #include "amd_iommu_types.h"
 #include "amd_iommu.h"
 
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+unsigned long *count)
+{
+   unsigned long pte_mask, pg_size, cnt;
+   u64 *fpte;
+
+   pg_size  = PTE_PAGE_SIZE(*pte);
+   cnt  = PAGE_SIZE_PTE_COUNT(pg_size);
+   pte_mask = ~((cnt << 3) - 1);
+   fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+   if (page_size)
+   *page_size = pg_size;
+
+   if (count)
+   *count = cnt;
+
+   return fpte;
+}
+
+/
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ /
+
+static void free_page_list(struct page *freelist)
+{
+   while (freelist != NULL) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+   struct page *p = virt_to_page((void *)pt);
+
+   p->freelist = freelist;
+
+   return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) 
\
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)  
\
+{  
\
+   unsigned long p;
\
+   u64 *pt;
\
+   int i;  
\
+   
\
+   pt = (u64 *)__pt;   
\
+   
\
+   for (i = 0; i < 512; ++i) { 
\
+   /* PTE present? */  
\
+   if (!IOMMU_PTE_PRESENT(pt[i]))  
\
+   continue;   
\
+   
\
+   /* Large PTE? */
\
+   if (PM_PTE_LEVEL(pt[i]) == 0 || 
\
+   PM_PTE_LEVEL(pt[i]) == 7)   
\
+   continue;   
\
+   
\
+   p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);  

[PATCH v2 07/13] iommu/amd: Restructure code for freeing page table

2020-10-02 Thread Suravee Suthikulpanit
Introduce amd_iommu_free_pgtable helper function, which consolidates
logic for freeing page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 12 +++-
 drivers/iommu/amd/iommu.c  | 19 ++-
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index ee7ff4d827e1..8dff7d85be79 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
 /* TODO: These are temporary and will be removed once fully transition */
-extern void free_pagetable(struct domain_pgtable *pgtable);
 extern int iommu_map_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long phys_addr,
@@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
 struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
+extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 7fd3dd9db197..0c886419166b 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -136,14 +136,24 @@ static struct page *free_sub_pt(unsigned long root, int 
mode,
return freelist;
 }
 
-void free_pagetable(struct domain_pgtable *pgtable)
+void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable)
 {
+   struct protection_domain *dom;
struct page *freelist = NULL;
unsigned long root;
 
if (pgtable->mode == PAGE_MODE_NONE)
return;
 
+   dom = container_of(pgtable, struct protection_domain, iop);
+
+   /* Update data structure */
+   amd_iommu_domain_clr_pt_root(dom);
+
+   /* Make changes visible to IOMMUs */
+   amd_iommu_domain_update(dom);
+
+   /* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
   pgtable->mode > PAGE_MODE_6_LEVEL);
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4d65f64236b6..cbbea7b952fb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain 
*domain)
 
 static void protection_domain_free(struct protection_domain *domain)
 {
-   struct domain_pgtable pgtable;
-
if (!domain)
return;
 
if (domain->id)
domain_id_free(domain->id);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-   amd_iommu_domain_clr_pt_root(domain);
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
kfree(domain);
 }
@@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
struct protection_domain *domain = to_pdomain(dom);
-   struct domain_pgtable pgtable;
unsigned long flags;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   /* First save pgtable configuration*/
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   /* Remove page-table from domain */
-   amd_iommu_domain_clr_pt_root(domain);
-
-   /* Make changes visible to IOMMUs */
-   amd_iommu_domain_update(domain);
-
-   /* Page-table is not visible to IOMMU anymore, so free it */
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
spin_unlock_irqrestore(&domain->lock, flags);
 }
-- 
2.17.1



[PATCH v2 04/13] iommu/amd: Convert to using amd_io_pgtable

2020-10-02 Thread Suravee Suthikulpanit
Make use of the new struct amd_io_pgtable in preparation to remove
the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  1 +
 drivers/iommu/amd/iommu.c | 25 ++---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index da6e09657e00..22ecacb71675 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain 
*dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c8b8619cc744..09da37c4c9c4 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache;
 
 static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
 
 /
  *
@@ -1482,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
 
pgtable.root  = pte;
pgtable.mode += 1;
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
domain_flush_complete(domain);
 
/*
@@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain 
*domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
  bool ats, bool ppr)
 {
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
 
-   if (pgtable->mode != PAGE_MODE_NONE)
-   pte_root = iommu_virt_to_phys(pgtable->root);
+   if (domain->iop.mode != PAGE_MODE_NONE)
+   pte_root = iommu_virt_to_phys(domain->iop.root);
 
-   pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+   pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   set_dte_entry(dev_data->devid, domain, &pgtable,
+   set_dte_entry(dev_data->devid, domain,
  ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
 
@@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct 
iommu_domain *domain,
  *
  */
 
-static void update_device_table(struct protection_domain *domain,
-   struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
 {
struct iommu_dev_data *dev_data;
 
list_for_each_entry(dev_data, &domain->dev_list, list) {
-   set_dte_entry(dev_data->devid, domain, pgtable,
+   set_dte_entry(dev_data->devid, domain,
  dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
 }
 
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
 {
-   update_device_table(domain, pgtable);
+   update_device_table(domain);
domain_flush_devices(domain);
 }
 
@@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain 
*domain)
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
-- 
2.17.1



[PATCH v2 02/13] iommu/amd: Prepare for generic IO page table framework

2020-10-02 Thread Suravee Suthikulpanit
Add initial hook up code to implement generic IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/Kconfig   |  1 +
 drivers/iommu/amd/Makefile  |  2 +-
 drivers/iommu/amd/amd_iommu_types.h | 32 +
 drivers/iommu/amd/io_pgtable.c  | 43 +
 drivers/iommu/amd/iommu.c   | 10 ---
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 ++
 7 files changed, 82 insertions(+), 11 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+   select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
  With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index f696ac7c5f89..77cd8d966fbc 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
 
 #define GA_GUEST_NR0x1
 
+#define IOMMU_IN_ADDR_BIT_SIZE  52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES  ((~0xFFFUL) & ~(2ULL << 38))
+
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
@@ -461,6 +475,23 @@ struct amd_irte_ops;
 
 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
 
+#define io_pgtable_to_data(x) \
+   container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+   io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+   container_of(io_pgtable_ops_to_data(x), \
+struct protection_domain, iop)
+
+struct amd_io_pgtable {
+   struct io_pgtable_cfg   pgtbl_cfg;
+   struct io_pgtable   iop;
+   int mode;
+   u64 *root;
+};
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -469,6 +500,7 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
   iommu core code */
+   struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
atomic64_t pt_root; /* pgtable root and pgtable mode */
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index ..f123ab6e8a51
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit 
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt)pr_fmt(fmt)
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+/*
+ * 
+ */
+static void v1_free_pgtable(struct io_pgtable *iop)
+{
+}
+
+static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
+{
+   struct protection_domain *pdom = (struct protection_domain *)cookie;
+
+   return &pdom->iop.iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
+   .alloc  = v1_alloc_pgtable,
+   .free   = v1_free_pgtable,
+};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index e92b3f744292..2b7e

[PATCH v2 09/13] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops

2020-10-02 Thread Suravee Suthikulpanit
There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index a2acd7e85ec3..ff1294e8729d 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * and full 64 bit address spaces.
  */
 int iommu_map_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long phys_addr,
-  unsigned long page_size,
+  unsigned long iova,
+  unsigned long paddr,
+  unsigned long size,
   int prot,
   gfp_t gfp)
 {
@@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom,
u64 __pte, *pte;
int ret, i, count;
 
-   BUG_ON(!IS_ALIGNED(bus_addr, page_size));
-   BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+   BUG_ON(!IS_ALIGNED(iova, size));
+   BUG_ON(!IS_ALIGNED(paddr, size));
 
ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
goto out;
 
-   count = PAGE_SIZE_PTE_COUNT(page_size);
-   pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+   count = PAGE_SIZE_PTE_COUNT(size);
+   pte   = alloc_pte(dom, iova, size, NULL, gfp, &updated);
 
ret = -ENOMEM;
if (!pte)
@@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom,
updated = true;
 
if (count > 1) {
-   __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
+   __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+   __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom,
 }
 
 unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long bus_addr,
-  unsigned long page_size)
+  unsigned long iova,
+  unsigned long size)
 {
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
 
-   BUG_ON(!is_power_of_2(page_size));
+   BUG_ON(!is_power_of_2(size));
 
unmapped = 0;
 
-   while (unmapped < page_size) {
-
-   pte = fetch_pte(dom, bus_addr, &unmap_size);
+   while (unmapped < size) {
+   pte = fetch_pte(dom, iova, &unmap_size);
 
if (pte) {
int i, count;
@@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
pte[i] = 0ULL;
}
 
-   bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
+   iova = (iova & ~(unmap_size - 1)) + unmap_size;
unmapped += unmap_size;
}
 
-- 
2.17.1



[PATCH v2 11/13] iommu/amd: Introduce iommu_v1_iova_to_phys

2020-10-02 Thread Suravee Suthikulpanit
This implements iova_to_phys for AMD IOMMU v1 pagetable,
which will be used by the IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 21 +
 drivers/iommu/amd/iommu.c  | 16 +---
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 1729c303bae5..bbbf18d2514a 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
return unmapped;
 }
 
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned 
long iova)
+{
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+   unsigned long offset_mask, pte_pgsize;
+   u64 *pte, __pte;
+
+   if (pgtable->mode == PAGE_MODE_NONE)
+   return iova;
+
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+   if (!pte || !IOMMU_PTE_PRESENT(*pte))
+   return 0;
+
+   offset_mask = pte_pgsize - 1;
+   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
+
+   return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
 /*
  * 
  */
@@ -505,6 +525,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct protection_domain *pdom = (struct protection_domain *)cookie;
 
+   pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
return &pdom->iop.iop;
 }
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 87cea1cde414..9a1a16031e00 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
 {
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
-   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
-   unsigned long offset_mask, pte_pgsize;
-   u64 *pte, __pte;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
-   return iova;
-
-   pte = fetch_pte(pgtable, iova, &pte_pgsize);
-
-   if (!pte || !IOMMU_PTE_PRESENT(*pte))
-   return 0;
-
-   offset_mask = pte_pgsize - 1;
-   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
-
-   return (__pte & ~offset_mask) | (iova & offset_mask);
+   return ops->iova_to_phys(ops, iova);
 }
 
 static bool amd_iommu_capable(enum iommu_cap cap)
-- 
2.17.1



[PATCH v2 13/13] iommu/amd: Adopt IO page table framework

2020-10-02 Thread Suravee Suthikulpanit
Switch to using IO page table framework for AMD IOMMU v1 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/iommu.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 77f44b927ae7..c28949be3442 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1573,6 +1574,20 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
return ret;
 }
 
+struct io_pgtable_ops *
+amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data,
+  struct protection_domain *domain)
+{
+   domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) {
+   .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
+   .ias= IOMMU_IN_ADDR_BIT_SIZE,
+   .oas= IOMMU_OUT_ADDR_BIT_SIZE,
+   .iommu_dev  = &dev_data->pdev->dev,
+   };
+
+   return alloc_io_pgtable_ops(AMD_IOMMU_V1, &domain->iop.pgtbl_cfg, 
domain);
+}
+
 /*
  * If a device is not yet associated with a domain, this function makes the
  * device visible in the domain
@@ -1580,6 +1595,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
 static int attach_device(struct device *dev,
 struct protection_domain *domain)
 {
+   struct io_pgtable_ops *pgtbl_ops;
struct iommu_dev_data *dev_data;
struct pci_dev *pdev;
unsigned long flags;
@@ -1623,6 +1639,12 @@ static int attach_device(struct device *dev,
 skip_ats_check:
ret = 0;
 
+   pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain);
+   if (!pgtbl_ops) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
do_attach(dev_data, domain);
 
/*
@@ -1958,6 +1980,8 @@ static void amd_iommu_domain_free(struct iommu_domain 
*dom)
if (domain->dev_cnt > 0)
cleanup_domain(domain);
 
+   free_io_pgtable_ops(&domain->iop.iop.ops);
+
BUG_ON(domain->dev_cnt != 0);
 
if (!dom)
-- 
2.17.1



[PATCH v2 01/13] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline

2020-10-02 Thread Suravee Suthikulpanit
Move the function to header file to allow inclusion in other files.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h | 13 +
 drivers/iommu/amd/iommu.c | 10 --
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 57309716fd18..97cdb235ce69 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -93,6 +93,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
 }
 
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+   atomic64_set(&domain->pt_root, root);
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+   amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
 extern bool translation_pre_enabled(struct amd_iommu *iommu);
 extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 struct device *dev);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index db4fb840c59c..e92b3f744292 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -162,16 +162,6 @@ static void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 
root)
-{
-   atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
-   amd_iommu_domain_set_pt_root(domain, 0);
-}
-
 static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode)
 {
-- 
2.17.1



[PATCH v2 10/13] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable

2020-10-02 Thread Suravee Suthikulpanit
To simplify the fetch_pte function. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 13 +++--
 drivers/iommu/amd/iommu.c  |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2059e64fdc53..69996e57fae2 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom,
 extern unsigned long iommu_unmap_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long page_size);
-extern u64 *fetch_pte(struct protection_domain *domain,
+extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  unsigned long address,
  unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index ff1294e8729d..1729c303bae5 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct protection_domain *domain,
+u64 *fetch_pte(struct amd_io_pgtable *pgtable,
   unsigned long address,
   unsigned long *page_size)
 {
@@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain,
 
*page_size = 0;
 
-   if (address > PM_LEVEL_SIZE(domain->iop.mode))
+   if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
 
-   level  =  domain->iop.mode - 1;
-   pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+   level  =  pgtable->mode - 1;
+   pte= &pgtable->root[PM_LEVEL_INDEX(level, address)];
*page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
while (level > 0) {
@@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
   unsigned long iova,
   unsigned long size)
 {
+   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
@@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
unmapped = 0;
 
while (unmapped < size) {
-   pte = fetch_pte(dom, iova, &unmap_size);
-
+   pte = fetch_pte(pgtable, iova, &unmap_size);
if (pte) {
int i, count;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 3f6ede1e572c..87cea1cde414 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
  dma_addr_t iova)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
 
if (domain->iop.mode == PAGE_MODE_NONE)
return iova;
 
-   pte = fetch_pte(domain, iova, &pte_pgsize);
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
-- 
2.17.1



[PATCH v2 12/13] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page

2020-10-02 Thread Suravee Suthikulpanit
These implement map and unmap for AMD IOMMU v1 pagetable, which
will be used by the IO pagetable framework.

Also clean up unused extern function declarations.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  | 13 -
 drivers/iommu/amd/io_pgtable.c | 25 -
 drivers/iommu/amd/iommu.c  |  7 ---
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 69996e57fae2..2e8dc2a1ec0f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
-/* TODO: These are temporary and will be removed once fully transition */
-extern int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp);
-extern unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size);
-extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index bbbf18d2514a..a5f8d80a9d35 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct amd_io_pgtable *pgtable,
-  unsigned long address,
-  unsigned long *page_size)
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
 {
int level;
u64 *pte;
@@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-int iommu_map_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long paddr,
-  unsigned long size,
-  int prot,
-  gfp_t gfp)
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
+   struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
struct page *freelist = NULL;
bool updated = false;
u64 __pte, *pte;
@@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom,
return ret;
 }
 
-unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long size)
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
 {
-   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
@@ -525,6 +522,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct protection_domain *pdom = (struct protection_domain *)cookie;
 
+   pdom->iop.iop.ops.map  = iommu_v1_map_page;
+   pdom->iop.iop.ops.unmap= iommu_v1_unmap_page;
pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
return &pdom->iop.iop;
 }
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9a1a16031e00..77f44b927ae7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
 gfp_t gfp)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
int ret;
 
@@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
 
-   ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
+   ret = op

[PATCH v2 05/13] iommu/amd: Declare functions as extern

2020-10-02 Thread Suravee Suthikulpanit
And move declaration to header file so that they can be included across
multiple files. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  3 +++
 drivers/iommu/amd/iommu.c | 39 +--
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 22ecacb71675..8b7be9171030 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain 
*dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
 extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 09da37c4c9c4..f91f35edb7ba 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -88,7 +88,6 @@ struct iommu_cmd {
 
 struct kmem_cache *amd_iommu_irq_cache;
 
-static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /
@@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain 
*domain,
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
 {
int i;
 
@@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct 
protection_domain *domain,
 
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
 }
@@ -1481,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
pgtable.root  = pte;
pgtable.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/*
 * Device Table needs to be updated and flushed before the new root can
@@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom,
 * Updates and flushing already happened in
 * increase_address_space().
 */
-   domain_flush_tlb_pde(dom);
-   domain_flush_complete(dom);
+   amd_iommu_domain_flush_tlb_pde(dom);
+   amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
 
@@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
 
/* Flush IOTLB */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
/* Wait for the flushes to finish */
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev,
 * left the caches in the IOMMU dirty. So we have to flush
 * here to evict all dirty stuff.
 */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
 out:
spin_unlock(&dev_data->lock);
@@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct 
protection_domain *domain)
domain_flush_devices(domain);
 }
 
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
 {
struct domain_pgtable pgtable;
 
@@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain 
*domain)
amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
-   domain_flush_tlb_pde(domain);
- 

[PATCH v2 03/13] iommu/amd: Move pt_root to to struct amd_io_pgtable

2020-10-02 Thread Suravee Suthikulpanit
To better organize the data structure since it contains IO page table
related information.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   | 2 +-
 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 drivers/iommu/amd/iommu.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 97cdb235ce69..da6e09657e00 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
 static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
-   atomic64_set(&domain->pt_root, root);
+   atomic64_set(&domain->iop.pt_root, root);
 }
 
 static inline
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 77cd8d966fbc..5d53b7bec256 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -490,6 +490,7 @@ struct amd_io_pgtable {
struct io_pgtable   iop;
int mode;
u64 *root;
+   atomic64_t pt_root; /* pgtable root and pgtable mode */
 };
 
 /*
@@ -503,7 +504,6 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
-   atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx;/* Number of levels for GCR3 table */
u64 *gcr3_tbl;  /* Guest CR3 table */
unsigned long flags;/* flags to find out type of domain */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2b7eb51dcbb8..c8b8619cc744 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct 
iommu_domain *dom)
 static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
 struct domain_pgtable *pgtable)
 {
-   u64 pt_root = atomic64_read(&domain->pt_root);
+   u64 pt_root = atomic64_read(&domain->iop.pt_root);
 
pgtable->root = (u64 *)(pt_root & PAGE_MASK);
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-- 
2.17.1



[PATCH v2 00/13] iommu/amd: Add Generic IO Page Table Framework Support

2020-10-02 Thread Suravee Suthikulpanit
The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).

This series refactors the current implementation of AMD IOMMU v1 page table
to adopt the framework. There should be no functional change.
Subsequent series will introduce support for the AMD IOMMU v2 page table.

Thanks,
Suravee

Change from V1 (https://lkml.org/lkml/2020/9/23/251)
  - Do not specify struct io_pgtable_cfg.coherent_walk, since it is
not currently used. (per Robin)
  - Remove unused struct iommu_flush_ops.  (patch 2/13)
  - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c
patch 13/13)

Suravee Suthikulpanit (13):
  iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
  iommu/amd: Prepare for generic IO page table framework
  iommu/amd: Move pt_root to to struct amd_io_pgtable
  iommu/amd: Convert to using amd_io_pgtable
  iommu/amd: Declare functions as extern
  iommu/amd: Move IO page table related functions
  iommu/amd: Restructure code for freeing page table
  iommu/amd: Remove amd_iommu_domain_get_pgtable
  iommu/amd: Rename variables to be consistent with struct
io_pgtable_ops
  iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
  iommu/amd: Introduce iommu_v1_iova_to_phys
  iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
  iommu/amd: Adopt IO page table framework

 drivers/iommu/amd/Kconfig   |   1 +
 drivers/iommu/amd/Makefile  |   2 +-
 drivers/iommu/amd/amd_iommu.h   |  22 +
 drivers/iommu/amd/amd_iommu_types.h |  40 +-
 drivers/iommu/amd/io_pgtable.c  | 534 +++
 drivers/iommu/amd/iommu.c   | 644 +++-
 drivers/iommu/io-pgtable.c  |   3 +
 include/linux/io-pgtable.h  |   2 +
 8 files changed, 656 insertions(+), 592 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

-- 
2.17.1



Re: [PATCH 00/13] iommu: amd: Add Generic IO Page Table Framework Support

2020-10-01 Thread Suravee Suthikulpanit

Joerg,

On 10/1/20 7:59 PM, Joerg Roedel wrote:

On Thu, Sep 24, 2020 at 05:50:37PM +0700, Suravee Suthikulpanit wrote:



On 9/24/20 5:34 PM, Joerg Roedel wrote:

Hi Suravee,

On Wed, Sep 23, 2020 at 10:14:29AM +, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).


Is there a reason you created your own framework, there is already an
io-pgtable framework for ARM, maybe that can be reused?



Actually, this is the same framework used by ARM codes.
Sorry if the description is not clear.


Ah, right, thanks. I think this should spend some time in linux-next
before going upstream. Can you please remind me after the next merge
window to pick it up?

Thanks,

Joerg



Sure. Let me send out v2 for this with some more clean up.

Thanks,
Suravee


Re: [PATCH] KVM: SVM: Initialize ir_list and ir_list_lock regardless of AVIC enablement

2020-09-27 Thread Suravee Suthikulpanit

Hi,

Are there any issues or concerns about this patch?

Thank you,
Suravee

On 9/22/20 3:44 PM, Suravee Suthikulpanit wrote:

The struct vcpu_svm.ir_list and ir_list_lock are being accessed even when
AVIC is not enabled, while current code only initialize the list and
the lock only when AVIC is enabled. This ended up trigger NULL pointer
dereference bug in the function vm_ir_list_del with the following
call trace:

 svm_update_pi_irte+0x3c2/0x550 [kvm_amd]
 ? proc_create_single_data+0x41/0x50
 kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm]
 __connect+0x5f/0xb0 [irqbypass]
 irq_bypass_register_producer+0xf8/0x120 [irqbypass]
 vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci]
 vfio_msi_set_block+0x77/0xe0 [vfio_pci]
 vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci]
 vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci]
 vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci]
 ? alloc_file_pseudo+0xa5/0x100
 vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
 ? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio]
 __x64_sys_ioctl+0x96/0xd0
 do_syscall_64+0x37/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Therefore, move the initialziation code before checking for AVIC enabled
so that it is always excuted.

Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into 
avic_init_vcpu()")
Signed-off-by: Suravee Suthikulpanit 
---
  arch/x86/kvm/svm/avic.c | 2 --
  arch/x86/kvm/svm/svm.c  | 3 +++
  2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ac830cd50830..1ccf13783785 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -572,8 +572,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
if (ret)
return ret;
  
-	INIT_LIST_HEAD(&svm->ir_list);

-   spin_lock_init(&svm->ir_list_lock);
svm->dfr_reg = APIC_DFR_FLAT;
  
  	return ret;

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c44f3e9140d5..714d791fe5a5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1225,6 +1225,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x0165;
  
+	INIT_LIST_HEAD(&svm->ir_list);

+   spin_lock_init(&svm->ir_list_lock);
+
return 0;
  
  free_page4:




[PATCH] iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE

2020-09-25 Thread Suravee Suthikulpanit
When using 128-bit interrupt-remapping table entry (IRTE) (a.k.a GA mode),
current driver disables interrupt remapping when it updates the IRTE
so that the upper and lower 64-bit values can be updated safely.

However, this creates a small window, where the interrupt could
arrive and result in IO_PAGE_FAULT (for interrupt) as shown below.

  IOMMU DriverDevice IRQ
  ===
  irte.RemapEn=0
   ...
   change IRTEIRQ from device ==> IO_PAGE_FAULT !!
   ...
  irte.RemapEn=1

This scenario has been observed when changing irq affinity on a system
running I/O-intensive workload, in which the destination APIC ID
in the IRTE is updated.

Instead, use cmpxchg_double() to update the 128-bit IRTE at once without
disabling the interrupt remapping. However, this means several features,
which require GA (128-bit IRTE) support will also be affected if cmpxchg16b
is not supported (which is unprecedented for AMD processors w/ IOMMU).

Cc: sta...@vger.kernel.org
Fixes: 880ac60e2538 ("iommu/amd: Introduce interrupt remapping ops structure")
Reported-by: Sean Osborne 
Signed-off-by: Suravee Suthikulpanit 
Tested-by: Erik Rockstrom 
Reviewed-by: Joao Martins 
Link: 
https://lore.kernel.org/r/20200903093822.52012-3-suravee.suthikulpa...@amd.com
Signed-off-by: Joerg Roedel 
---
Note: This patch is the back-port on top of the stable branch linux-5.4.y
for the upstream commit e52d58d54a32 ("iommu/amd: Use cmpxchg_double() when
updating 128-bit IRTE") since the original patch does not apply cleanly.

 drivers/iommu/Kconfig  |  2 +-
 drivers/iommu/amd_iommu.c  | 17 +
 drivers/iommu/amd_iommu_init.c | 21 +++--
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 390568afee9f..fc0160e8ed33 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -138,7 +138,7 @@ config AMD_IOMMU
select PCI_PASID
select IOMMU_API
select IOMMU_IOVA
-   depends on X86_64 && PCI && ACPI
+   depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
---help---
  With this option you can enable support for AMD IOMMU hardware in
  your system. An IOMMU is a hardware component which provides
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fa91d856a43e..7b724f7b27a9 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3873,6 +3873,7 @@ static int alloc_irq_index(u16 devid, int count, bool 
align,
 static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
  struct amd_ir_data *data)
 {
+   bool ret;
struct irq_remap_table *table;
struct amd_iommu *iommu;
unsigned long flags;
@@ -3890,10 +3891,18 @@ static int modify_irte_ga(u16 devid, int index, struct 
irte_ga *irte,
 
entry = (struct irte_ga *)table->table;
entry = &entry[index];
-   entry->lo.fields_remap.valid = 0;
-   entry->hi.val = irte->hi.val;
-   entry->lo.val = irte->lo.val;
-   entry->lo.fields_remap.valid = 1;
+
+   ret = cmpxchg_double(&entry->lo.val, &entry->hi.val,
+entry->lo.val, entry->hi.val,
+irte->lo.val, irte->hi.val);
+   /*
+* We use cmpxchg16 to atomically update the 128-bit IRTE,
+* and it cannot be updated by the hardware or other processors
+* behind us, so the return value of cmpxchg16 should be the
+* same as the old value.
+*/
+   WARN_ON(!ret);
+
if (data)
data->ref = entry;
 
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 135ae5222cf3..31d7e2d4f304 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1522,7 +1522,14 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
-   if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
+
+   /*
+* Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
+* GAM also requires GA mode. Therefore, we need to
+* check cmpxchg16b support before enabling it.
+*/
+   if (!boot_cpu_has(X86_FEATURE_CX16) ||
+   ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
case 0x11:
@@ -1531,8 +1538,18 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->mmio_phys_end =

Re: [PATCH 02/13] iommu: amd: Prepare for generic IO page table framework

2020-09-25 Thread Suravee Suthikulpanit

Robin,

On 9/24/20 7:25 PM, Robin Murphy wrote:

+struct io_pgtable_ops *amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data 
*dev_data,
+ struct protection_domain *domain)
+{
+domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) {
+.pgsize_bitmap= AMD_IOMMU_PGSIZES,
+.ias= IOMMU_IN_ADDR_BIT_SIZE,
+.oas= IOMMU_OUT_ADDR_BIT_SIZE,
+.coherent_walk= false,


Is that right? Given that you seem to use regular kernel addresses for pagetable pages and don't have any obvious cache 
maintenance around PTE manipulation, I suspect not ;)

> It's fair enough if your implementation doesn't use this and simply assumes 
coherency, but in that case it would be less
confusing to have the driver set it to true for the sake of honesty, or just leave it out 
entirely - explicitly setting false gives the illusion of being meaningful.


AMD IOMMU can be configured to disable snoop for page table walk of a particular device (DTE[SD]=1). However, the 
current Linux driver does not set this bit, which should assume coherency. We can just leaving this out for now. I can 
remove this when I send out V2 along w/ other changes.


Otherwise, the io-pgtable parts all look OK to me - it's nice to finally 
fulfil the original intent of not being an Arm-specific thing :D


Robin.


Thanks,
Suravee


Re: [PATCH 00/13] iommu: amd: Add Generic IO Page Table Framework Support

2020-09-24 Thread Suravee Suthikulpanit




On 9/24/20 5:34 PM, Joerg Roedel wrote:

Hi Suravee,

On Wed, Sep 23, 2020 at 10:14:29AM +, Suravee Suthikulpanit wrote:

The framework allows callable implementation of IO page table.
This allows AMD IOMMU driver to switch between different types
of AMD IOMMU page tables (e.g. v1 vs. v2).


Is there a reason you created your own framework, there is already an
io-pgtable framework for ARM, maybe that can be reused?



Actually, this is the same framework used by ARM codes.
Sorry if the description is not clear.

Suravee



[PATCH v2 1/3] iommu: amd: Use 4K page for completion wait write-back semaphore

2020-09-23 Thread Suravee Suthikulpanit
IOMMU SNP support requires the completion wait write-back semaphore to be
implemented using a 4K-aligned page, where the page address is to be
programmed into the newly introduced MMIO base/range registers.

This new scheme uses a per-iommu atomic variable to store the current
semaphore value, which is incremented for every completion wait command.

Since this new scheme is also compatible with non-SNP mode,
generalize the driver to use 4K page for completion-wait semaphore in
both modes.

Cc: Brijesh Singh 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h |  3 ++-
 drivers/iommu/amd/init.c| 18 ++
 drivers/iommu/amd/iommu.c   | 23 +++
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 30a5d412255a..4c80483e78ec 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -595,7 +595,8 @@ struct amd_iommu {
 #endif
 
u32 flags;
-   volatile u64 __aligned(8) cmd_sem;
+   volatile u64 *cmd_sem;
+   u64 cmd_sem_val;
 
 #ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 445a08d23fed..febc072f2717 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -813,6 +813,19 @@ static int iommu_init_ga(struct amd_iommu *iommu)
return ret;
 }
 
+static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
+{
+   iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+
+   return iommu->cmd_sem ? 0 : -ENOMEM;
+}
+
+static void __init free_cwwb_sem(struct amd_iommu *iommu)
+{
+   if (iommu->cmd_sem)
+   free_page((unsigned long)iommu->cmd_sem);
+}
+
 static void iommu_enable_xt(struct amd_iommu *iommu)
 {
 #ifdef CONFIG_IRQ_REMAP
@@ -1395,6 +1408,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu 
*iommu,
 
 static void __init free_iommu_one(struct amd_iommu *iommu)
 {
+   free_cwwb_sem(iommu);
free_command_buffer(iommu);
free_event_buffer(iommu);
free_ppr_log(iommu);
@@ -1481,6 +1495,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, 
struct ivhd_header *h)
int ret;
 
raw_spin_lock_init(&iommu->lock);
+   iommu->cmd_sem_val = 0;
 
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
@@ -1558,6 +1573,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, 
struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
 
+   if (alloc_cwwb_sem(iommu))
+   return -ENOMEM;
+
if (alloc_command_buffer(iommu))
return -ENOMEM;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 10e4200d3552..9e9898683537 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -792,11 +792,11 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  /
 
-static int wait_on_sem(volatile u64 *sem)
+static int wait_on_sem(struct amd_iommu *iommu, u64 data)
 {
int i = 0;
 
-   while (*sem == 0 && i < LOOP_TIMEOUT) {
+   while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {
udelay(1);
i += 1;
}
@@ -827,16 +827,16 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
-static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
+static void build_completion_wait(struct iommu_cmd *cmd,
+ struct amd_iommu *iommu,
+ u64 data)
 {
-   u64 paddr = iommu_virt_to_phys((void *)address);
-
-   WARN_ON(address & 0x7ULL);
+   u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
 
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
cmd->data[1] = upper_32_bits(paddr);
-   cmd->data[2] = 1;
+   cmd->data[2] = data;
CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
@@ -1045,22 +1045,21 @@ static int iommu_completion_wait(struct amd_iommu 
*iommu)
struct iommu_cmd cmd;
unsigned long flags;
int ret;
+   u64 data;
 
if (!iommu->need_sync)
return 0;
 
-
-   build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
-
raw_spin_lock_irqsave(&iommu->lock, flags);
 
-   iommu->cmd_sem = 0;
+   data = ++iommu->cmd_sem_val;
+   build_completion_wait(&cmd, iommu, data);
 
ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
goto out_unlock;
 
-   ret = wait_on_sem(&iommu->cmd_sem);
+   ret = wait_on_sem(iommu, data);
 
 out_unlock:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
-- 
2.17.1



[PATCH v2 3/3] iommu: amd: Re-purpose Exclusion range registers to support SNP CWWB

2020-09-23 Thread Suravee Suthikulpanit
When the IOMMU SNP support bit is set in the IOMMU Extended Features
register, hardware re-purposes the following registers:

1. IOMMU Exclusion Base register (MMIO offset 0020h) to
   Completion Wait Write-Back (CWWB) Base register

2. IOMMU Exclusion Range Limit (MMIO offset 0028h) to
   Completion Wait Write-Back (CWWB) Range Limit register

and requires the IOMMU CWWB semaphore base and range to be programmed
in the register offset 0020h and 0028h accordingly.

Cc: Brijesh Singh 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h |  1 +
 drivers/iommu/amd/init.c| 26 ++
 2 files changed, 27 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 1e7966c73707..f696ac7c5f89 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -93,6 +93,7 @@
 #define FEATURE_PC (1ULL<<9)
 #define FEATURE_GAM_VAPIC  (1ULL<<21)
 #define FEATURE_EPHSUP (1ULL<<50)
+#define FEATURE_SNP(1ULL<<63)
 
 #define FEATURE_PASID_SHIFT32
 #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index febc072f2717..c55df4347487 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -359,6 +359,29 @@ static void iommu_set_exclusion_range(struct amd_iommu 
*iommu)
&entry, sizeof(entry));
 }
 
+static void iommu_set_cwwb_range(struct amd_iommu *iommu)
+{
+   u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
+   u64 entry = start & PM_ADDR_MASK;
+
+   if (!iommu_feature(iommu, FEATURE_SNP))
+   return;
+
+   /* Note:
+* Re-purpose Exclusion base/limit registers for Completion wait
+* write-back base/limit.
+*/
+   memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
+   &entry, sizeof(entry));
+
+   /* Note:
+* Default to 4 Kbytes, which can be specified by setting base
+* address equal to the limit address.
+*/
+   memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
+   &entry, sizeof(entry));
+}
+
 /* Programs the physical address of the device table into the IOMMU hardware */
 static void iommu_set_device_table(struct amd_iommu *iommu)
 {
@@ -1901,6 +1924,9 @@ static int __init amd_iommu_init_pci(void)
ret = iommu_init_pci(iommu);
if (ret)
break;
+
+   /* Need to setup range after PCI init */
+   iommu_set_cwwb_range(iommu);
}
 
/*
-- 
2.17.1



[PATCH v2 0/3] amd : iommu : Initial IOMMU support for SNP

2020-09-23 Thread Suravee Suthikulpanit
Introducing support for AMD Secure Nested Paging (SNP) with IOMMU,
which mainly affects the use of IOMMU Exclusion Base and Range Limit
registers. Note that these registers are no longer used by Linux IOMMU
driver. Patch 2 and 3 are SNP-specific, and discuss detail of
the implementation.

In order to support SNP, the current Completion Wait Write-back logic
is modified (patch 1/4). This change is independent from SNP.

Please see the following white paper for more info on SNP:
  
https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf
 

Changes from V1: (https://lkml.org/lkml/2020/9/16/455)
- Patch 2/3: Fix up per Joerg's comments

Thank you,
Suravee

Suravee Suthikulpanit (3):
  iommu: amd: Use 4K page for completion wait write-back semaphore
  iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR
  iommu: amd: Re-purpose Exclusion range registers to support SNP CWWB

 drivers/iommu/amd/amd_iommu_types.h |  6 +-
 drivers/iommu/amd/init.c| 44 ++
 drivers/iommu/amd/iommu.c   | 90 +
 3 files changed, 127 insertions(+), 13 deletions(-)

-- 
2.17.1



[PATCH v2 2/3] iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR

2020-09-23 Thread Suravee Suthikulpanit
IOMMU SNP support introduces two new IOMMU events:
  * RMP Page Fault event
  * RMP Hardware Error event

Hence, add reporting functions for these events.

Cc: Brijesh Singh 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu_types.h |  2 +
 drivers/iommu/amd/iommu.c   | 67 +
 2 files changed, 69 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 4c80483e78ec..1e7966c73707 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -128,6 +128,8 @@
 #define EVENT_TYPE_IOTLB_INV_TO0x7
 #define EVENT_TYPE_INV_DEV_REQ 0x8
 #define EVENT_TYPE_INV_PPR_REQ 0x9
+#define EVENT_TYPE_RMP_FAULT   0xd
+#define EVENT_TYPE_RMP_HW_ERR  0xe
 #define EVENT_DEVID_MASK   0x
 #define EVENT_DEVID_SHIFT  0
 #define EVENT_DOMID_MASK_LO0x
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9e9898683537..ea64fa8a9418 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -486,6 +486,67 @@ static void dump_command(unsigned long phys_addr)
pr_err("CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
+static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
+{
+   struct iommu_dev_data *dev_data = NULL;
+   int devid, vmg_tag, flags;
+   struct pci_dev *pdev;
+   u64 spa;
+
+   devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+   vmg_tag = (event[1]) & 0x;
+   flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+   spa = ((u64)event[3] << 32) | (event[2] & 0xFFF8);
+
+   pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+  devid & 0xff);
+   if (pdev)
+   dev_data = dev_iommu_priv_get(&pdev->dev);
+
+   if (dev_data && __ratelimit(&dev_data->rs)) {
+   pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, 
spa=0x%llx, flags=0x%04x]\n",
+   vmg_tag, spa, flags);
+   } else {
+   pr_err_ratelimited("Event logged [RMP_HW_ERROR 
device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+   PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+   vmg_tag, spa, flags);
+   }
+
+   if (pdev)
+   pci_dev_put(pdev);
+}
+
+static void amd_iommu_report_rmp_fault(volatile u32 *event)
+{
+   struct iommu_dev_data *dev_data = NULL;
+   int devid, flags_rmp, vmg_tag, flags;
+   struct pci_dev *pdev;
+   u64 gpa;
+
+   devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+   flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF;
+   vmg_tag   = (event[1]) & 0x;
+   flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+   gpa   = ((u64)event[3] << 32) | event[2];
+
+   pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+  devid & 0xff);
+   if (pdev)
+   dev_data = dev_iommu_priv_get(&pdev->dev);
+
+   if (dev_data && __ratelimit(&dev_data->rs)) {
+   pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, 
gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+   vmg_tag, gpa, flags_rmp, flags);
+   } else {
+   pr_err_ratelimited("Event logged [RMP_PAGE_FAULT 
device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, 
flags=0x%04x]\n",
+   PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+   vmg_tag, gpa, flags_rmp, flags);
+   }
+
+   if (pdev)
+   pci_dev_put(pdev);
+}
+
 static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
u64 address, int flags)
 {
@@ -577,6 +638,12 @@ static void iommu_print_event(struct amd_iommu *iommu, 
void *__evt)
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
+   case EVENT_TYPE_RMP_FAULT:
+   amd_iommu_report_rmp_fault(event);
+   break;
+   case EVENT_TYPE_RMP_HW_ERR:
+   amd_iommu_report_rmp_hw_error(event);
+   break;
case EVENT_TYPE_INV_PPR_REQ:
pasid = PPR_PASID(*((u64 *)__evt));
tag = event[1] & 0x03FF;
-- 
2.17.1



Re: [PATCH 2/3] iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR

2020-09-23 Thread Suravee Suthikulpanit




On 9/18/20 4:31 PM, Joerg Roedel wrote:

Hi Suravee,

On Wed, Sep 16, 2020 at 01:55:48PM +, Suravee Suthikulpanit wrote:

+static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
+{
+   struct pci_dev *pdev;
+   struct iommu_dev_data *dev_data = NULL;
+   int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+   int vmg_tag   = (event[1]) & 0x;
+   int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+   u64 spa   = ((u64)event[3] << 32) | (event[2] & 0xFFF8);


Please write this as:

struct iommu_dev_data *dev_data = NULL;
int devid, vmg_tag, flags;
struct pci_dev *pdev;
u64 spa;

devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
vmg_tag = (event[1]) & 0x;
flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
spa = ((u64)event[3] << 32) | (event[2] & 0xFFF8);

Same applied the the next function.


+
+   pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+  devid & 0xff);
+   if (pdev)
+   dev_data = dev_iommu_priv_get(&pdev->dev);
+
+   if (dev_data && __ratelimit(&dev_data->rs)) {
+   pci_err(pdev, "Event logged [RMP_HW_ERROR devid=0x%04x, 
vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+   devid, vmg_tag, spa, flags);


Printing the devid is not really needed here, no? Same issue in the next
function.


I'll update the patch and will send out V2.

Thanks,
Suravee


[PATCH 08/13] iommu: amd: Remove amd_iommu_domain_get_pgtable

2020-09-23 Thread Suravee Suthikulpanit
Since the IO page table root and mode parameters have been moved into
the struct amd_io_pg, the function is no longer needed. Therefore,
remove it along with the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   |  4 ++--
 drivers/iommu/amd/amd_iommu_types.h |  6 -
 drivers/iommu/amd/io_pgtable.c  | 36 ++---
 drivers/iommu/amd/iommu.c   | 34 ---
 4 files changed, 19 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 8dff7d85be79..2059e64fdc53 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -101,6 +101,8 @@ static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
atomic64_set(&domain->iop.pt_root, root);
+   domain->iop.root = (u64 *)(root & PAGE_MASK);
+   domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
 }
 
 static inline
@@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct 
protection_domain *dom,
 extern u64 *fetch_pte(struct protection_domain *domain,
  unsigned long address,
  unsigned long *page_size);
-extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
-struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 5d53b7bec256..a07af389eae1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -511,12 +511,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
-/* For decocded pt_root */
-struct domain_pgtable {
-   int mode;
-   u64 *root;
-};
-
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 8ce2f0325123..524c5406ccd6 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -215,30 +215,27 @@ static bool increase_address_space(struct 
protection_domain *domain,
   unsigned long address,
   gfp_t gfp)
 {
-   struct domain_pgtable pgtable;
unsigned long flags;
bool ret = true;
u64 *pte;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   if (address <= PM_LEVEL_SIZE(pgtable.mode))
+   if (address <= PM_LEVEL_SIZE(domain->iop.mode))
goto out;
 
ret = false;
-   if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
+   if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
 
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
 
-   *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+   *pte = PM_LEVEL_PDE(domain->iop.mode, 
iommu_virt_to_phys(domain->iop.root));
 
-   pgtable.root  = pte;
-   pgtable.mode += 1;
+   domain->iop.root  = pte;
+   domain->iop.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
amd_iommu_domain_flush_complete(domain);
 
@@ -246,7 +243,7 @@ static bool increase_address_space(struct protection_domain 
*domain,
 * Device Table needs to be updated and flushed before the new root can
 * be published.
 */
-   amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
+   amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
ret = true;
 
@@ -263,29 +260,23 @@ static u64 *alloc_pte(struct protection_domain *domain,
  gfp_t gfp,
  bool *updated)
 {
-   struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
 
BUG_ON(!is_power_of_2(page_size));
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+   while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
/*
 * Return an error if there is no memory to update the
 * page-table.
 */
if (!increase_address_space(domain, address, gfp))
return NULL;
-
-   /* Read new values to check if update was successful */
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
}
 
 
-   level   = pgtable.mode - 1;
-   pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
+   level   = domain-

[PATCH 13/13] iommu: amd: Adopt IO page table framework

2020-09-23 Thread Suravee Suthikulpanit
Switch to using IO page table framework for AMD IOMMU v1 page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  3 +++
 drivers/iommu/amd/iommu.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2e8dc2a1ec0f..046ea81a3b77 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -127,4 +127,7 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
+extern struct io_pgtable_ops *
+amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data,
+  struct protection_domain *pdom);
 #endif
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 77f44b927ae7..df304d8a630a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1580,6 +1581,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
 static int attach_device(struct device *dev,
 struct protection_domain *domain)
 {
+   struct io_pgtable_ops *pgtbl_ops;
struct iommu_dev_data *dev_data;
struct pci_dev *pdev;
unsigned long flags;
@@ -1623,6 +1625,12 @@ static int attach_device(struct device *dev,
 skip_ats_check:
ret = 0;
 
+   pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain);
+   if (!pgtbl_ops) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
do_attach(dev_data, domain);
 
/*
@@ -1958,6 +1966,8 @@ static void amd_iommu_domain_free(struct iommu_domain 
*dom)
if (domain->dev_cnt > 0)
cleanup_domain(domain);
 
+   free_io_pgtable_ops(&domain->iop.iop.ops);
+
BUG_ON(domain->dev_cnt != 0);
 
if (!dom)
-- 
2.17.1



[PATCH 12/13] iommu: amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page

2020-09-23 Thread Suravee Suthikulpanit
These implement map and unmap for AMD IOMMU v1 pagetable, which
will be used by the IO pagetable framework.

Also clean up unused extern function declarations.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  | 13 -
 drivers/iommu/amd/io_pgtable.c | 25 -
 drivers/iommu/amd/iommu.c  |  7 ---
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 69996e57fae2..2e8dc2a1ec0f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void);
 static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
-/* TODO: These are temporary and will be removed once fully transition */
-extern int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp);
-extern unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size);
-extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
 extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2f36bab23516..7f7be302c538 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -348,9 +348,9 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct amd_io_pgtable *pgtable,
-  unsigned long address,
-  unsigned long *page_size)
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
 {
int level;
u64 *pte;
@@ -423,13 +423,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, 
struct page *freelist)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-int iommu_map_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long paddr,
-  unsigned long size,
-  int prot,
-  gfp_t gfp)
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
+   struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
struct page *freelist = NULL;
bool updated = false;
u64 __pte, *pte;
@@ -492,11 +489,11 @@ int iommu_map_page(struct protection_domain *dom,
return ret;
 }
 
-unsigned long iommu_unmap_page(struct protection_domain *dom,
-  unsigned long iova,
-  unsigned long size)
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
 {
-   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
@@ -571,6 +568,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct protection_domain *pdom = (struct protection_domain *)cookie;
 
+   pdom->iop.iop.ops.map  = iommu_v1_map_page;
+   pdom->iop.iop.ops.unmap= iommu_v1_unmap_page;
pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
return &pdom->iop.iop;
 }
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9a1a16031e00..77f44b927ae7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
 gfp_t gfp)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
int ret;
 
@@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, 
unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
 
-   ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
+   ret = op

[PATCH 10/13] iommu: amd: Refactor fetch_pte to use struct amd_io_pgtable

2020-09-23 Thread Suravee Suthikulpanit
To simplify the fetch_pte function. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 13 +++--
 drivers/iommu/amd/iommu.c  |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2059e64fdc53..69996e57fae2 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom,
 extern unsigned long iommu_unmap_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long page_size);
-extern u64 *fetch_pte(struct protection_domain *domain,
+extern u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  unsigned long address,
  unsigned long *page_size);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 5da5ce98b7b3..f913fc7b1e58 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -348,7 +348,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
  * This function checks if there is a PTE for a given dma address. If
  * there is one, it returns the pointer to it.
  */
-u64 *fetch_pte(struct protection_domain *domain,
+u64 *fetch_pte(struct amd_io_pgtable *pgtable,
   unsigned long address,
   unsigned long *page_size)
 {
@@ -357,11 +357,11 @@ u64 *fetch_pte(struct protection_domain *domain,
 
*page_size = 0;
 
-   if (address > PM_LEVEL_SIZE(domain->iop.mode))
+   if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
 
-   level  =  domain->iop.mode - 1;
-   pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+   level  =  pgtable->mode - 1;
+   pte= &pgtable->root[PM_LEVEL_INDEX(level, address)];
*page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
while (level > 0) {
@@ -496,6 +496,8 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
   unsigned long iova,
   unsigned long size)
 {
+   struct io_pgtable_ops *ops = &dom->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long long unmapped;
unsigned long unmap_size;
u64 *pte;
@@ -505,8 +507,7 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
unmapped = 0;
 
while (unmapped < size) {
-   pte = fetch_pte(dom, iova, &unmap_size);
-
+   pte = fetch_pte(pgtable, iova, &unmap_size);
if (pte) {
int i, count;
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 3f6ede1e572c..87cea1cde414 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
  dma_addr_t iova)
 {
struct protection_domain *domain = to_pdomain(dom);
+   struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
 
if (domain->iop.mode == PAGE_MODE_NONE)
return iova;
 
-   pte = fetch_pte(domain, iova, &pte_pgsize);
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
-- 
2.17.1



[PATCH 02/13] iommu: amd: Prepare for generic IO page table framework

2020-09-23 Thread Suravee Suthikulpanit
Add initial hook up code to implement generic IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/Kconfig   |  1 +
 drivers/iommu/amd/Makefile  |  2 +-
 drivers/iommu/amd/amd_iommu_types.h | 32 +++
 drivers/iommu/amd/io_pgtable.c  | 89 +
 drivers/iommu/amd/iommu.c   | 10 
 drivers/iommu/io-pgtable.c  |  3 +
 include/linux/io-pgtable.h  |  2 +
 7 files changed, 128 insertions(+), 11 deletions(-)
 create mode 100644 drivers/iommu/amd/io_pgtable.c

diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+   select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
  With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index f696ac7c5f89..77cd8d966fbc 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
 
 #define GA_GUEST_NR0x1
 
+#define IOMMU_IN_ADDR_BIT_SIZE  52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES  ((~0xFFFUL) & ~(2ULL << 38))
+
 /* Bit value definition for dte irq remapping fields*/
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
@@ -461,6 +475,23 @@ struct amd_irte_ops;
 
 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
 
+#define io_pgtable_to_data(x) \
+   container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+   io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+   container_of(io_pgtable_ops_to_data(x), \
+struct protection_domain, iop)
+
+struct amd_io_pgtable {
+   struct io_pgtable_cfg   pgtbl_cfg;
+   struct io_pgtable   iop;
+   int mode;
+   u64 *root;
+};
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -469,6 +500,7 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
   iommu core code */
+   struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
atomic64_t pt_root; /* pgtable root and pgtable mode */
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index ..452cad26a2b3
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit 
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt)pr_fmt(fmt)
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_flush_leaf(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+unsigned long iova, size_t granule,
+void *cookie)
+{
+   struct protection_domain *pdom = cookie;
+   struct iommu_do

[PATCH 07/13] iommu: amd: Restructure code for freeing page table

2020-09-23 Thread Suravee Suthikulpanit
Introduce amd_iommu_free_pgtable helper function, which consolidates
logic for freeing page table.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h  |  2 +-
 drivers/iommu/amd/io_pgtable.c | 12 +++-
 drivers/iommu/amd/iommu.c  | 19 ++-
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index ee7ff4d827e1..8dff7d85be79 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { }
 #endif
 
 /* TODO: These are temporary and will be removed once fully transition */
-extern void free_pagetable(struct domain_pgtable *pgtable);
 extern int iommu_map_page(struct protection_domain *dom,
  unsigned long bus_addr,
  unsigned long phys_addr,
@@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct 
protection_domain *domain,
 struct domain_pgtable *pgtable);
 extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 u64 *root, int mode);
+extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable);
 #endif
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 7e4154e26757..8ce2f0325123 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -167,14 +167,24 @@ static struct page *free_sub_pt(unsigned long root, int 
mode,
return freelist;
 }
 
-void free_pagetable(struct domain_pgtable *pgtable)
+void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable)
 {
+   struct protection_domain *dom;
struct page *freelist = NULL;
unsigned long root;
 
if (pgtable->mode == PAGE_MODE_NONE)
return;
 
+   dom = container_of(pgtable, struct protection_domain, iop);
+
+   /* Update data structure */
+   amd_iommu_domain_clr_pt_root(dom);
+
+   /* Make changes visible to IOMMUs */
+   amd_iommu_domain_update(dom);
+
+   /* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
   pgtable->mode > PAGE_MODE_6_LEVEL);
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4d65f64236b6..cbbea7b952fb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain 
*domain)
 
 static void protection_domain_free(struct protection_domain *domain)
 {
-   struct domain_pgtable pgtable;
-
if (!domain)
return;
 
if (domain->id)
domain_id_free(domain->id);
 
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-   amd_iommu_domain_clr_pt_root(domain);
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
kfree(domain);
 }
@@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
struct protection_domain *domain = to_pdomain(dom);
-   struct domain_pgtable pgtable;
unsigned long flags;
 
spin_lock_irqsave(&domain->lock, flags);
 
-   /* First save pgtable configuration*/
-   amd_iommu_domain_get_pgtable(domain, &pgtable);
-
-   /* Remove page-table from domain */
-   amd_iommu_domain_clr_pt_root(domain);
-
-   /* Make changes visible to IOMMUs */
-   amd_iommu_domain_update(domain);
-
-   /* Page-table is not visible to IOMMU anymore, so free it */
-   free_pagetable(&pgtable);
+   amd_iommu_free_pgtable(&domain->iop);
 
spin_unlock_irqrestore(&domain->lock, flags);
 }
-- 
2.17.1



[PATCH 04/13] iommu: amd: Convert to using amd_io_pgtable

2020-09-23 Thread Suravee Suthikulpanit
Make use of the new struct amd_io_pgtable in preparation to remove
the struct domain_pgtable.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  1 +
 drivers/iommu/amd/iommu.c | 25 ++---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index da6e09657e00..22ecacb71675 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain 
*dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c8b8619cc744..09da37c4c9c4 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache;
 
 static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
 
 /
  *
@@ -1482,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
 
pgtable.root  = pte;
pgtable.mode += 1;
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
domain_flush_complete(domain);
 
/*
@@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain 
*domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
  bool ats, bool ppr)
 {
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
 
-   if (pgtable->mode != PAGE_MODE_NONE)
-   pte_root = iommu_virt_to_phys(pgtable->root);
+   if (domain->iop.mode != PAGE_MODE_NONE)
+   pte_root = iommu_virt_to_phys(domain->iop.root);
 
-   pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+   pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   set_dte_entry(dev_data->devid, domain, &pgtable,
+   set_dte_entry(dev_data->devid, domain,
  ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
 
@@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct 
iommu_domain *domain,
  *
  */
 
-static void update_device_table(struct protection_domain *domain,
-   struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
 {
struct iommu_dev_data *dev_data;
 
list_for_each_entry(dev_data, &domain->dev_list, list) {
-   set_dte_entry(dev_data->devid, domain, pgtable,
+   set_dte_entry(dev_data->devid, domain,
  dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
 }
 
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
 {
-   update_device_table(domain, pgtable);
+   update_device_table(domain);
domain_flush_devices(domain);
 }
 
@@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain 
*domain)
 
/* Update device table */
amd_iommu_domain_get_pgtable(domain, &pgtable);
-   update_and_flush_device_table(domain, &pgtable);
+   amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
-- 
2.17.1



[PATCH 03/13] iommu: amd: Move pt_root to to struct amd_io_pgtable

2020-09-23 Thread Suravee Suthikulpanit
To better organize the data structure since it contains IO page table
related information.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h   | 2 +-
 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 drivers/iommu/amd/iommu.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 97cdb235ce69..da6e09657e00 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
 static inline
 void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
 {
-   atomic64_set(&domain->pt_root, root);
+   atomic64_set(&domain->iop.pt_root, root);
 }
 
 static inline
diff --git a/drivers/iommu/amd/amd_iommu_types.h 
b/drivers/iommu/amd/amd_iommu_types.h
index 77cd8d966fbc..5d53b7bec256 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -490,6 +490,7 @@ struct amd_io_pgtable {
struct io_pgtable   iop;
int mode;
u64 *root;
+   atomic64_t pt_root; /* pgtable root and pgtable mode */
 };
 
 /*
@@ -503,7 +504,6 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock;/* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
-   atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx;/* Number of levels for GCR3 table */
u64 *gcr3_tbl;  /* Guest CR3 table */
unsigned long flags;/* flags to find out type of domain */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2b7eb51dcbb8..c8b8619cc744 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct 
iommu_domain *dom)
 static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
 struct domain_pgtable *pgtable)
 {
-   u64 pt_root = atomic64_read(&domain->pt_root);
+   u64 pt_root = atomic64_read(&domain->iop.pt_root);
 
pgtable->root = (u64 *)(pt_root & PAGE_MASK);
pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-- 
2.17.1



[PATCH 05/13] iommu: amd: Declare functions as extern

2020-09-23 Thread Suravee Suthikulpanit
And move declaration to header file so that they can be included across
multiple files. There is no functional change.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/amd_iommu.h |  3 +++
 drivers/iommu/amd/iommu.c | 39 +--
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 22ecacb71675..8b7be9171030 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain 
*dom, int pasids);
 extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
 extern void amd_iommu_update_and_flush_device_table(struct protection_domain 
*domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 unsigned long cr3);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 09da37c4c9c4..f91f35edb7ba 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -88,7 +88,6 @@ struct iommu_cmd {
 
 struct kmem_cache *amd_iommu_irq_cache;
 
-static void update_domain(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /
@@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain 
*domain,
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
 {
int i;
 
@@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct 
protection_domain *domain,
 
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
 }
@@ -1481,7 +1480,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
pgtable.root  = pte;
pgtable.mode += 1;
amd_iommu_update_and_flush_device_table(domain);
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/*
 * Device Table needs to be updated and flushed before the new root can
@@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom,
 * Updates and flushing already happened in
 * increase_address_space().
 */
-   domain_flush_tlb_pde(dom);
-   domain_flush_complete(dom);
+   amd_iommu_domain_flush_tlb_pde(dom);
+   amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
 
@@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
 
/* Flush IOTLB */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
/* Wait for the flushes to finish */
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev,
 * left the caches in the IOMMU dirty. So we have to flush
 * here to evict all dirty stuff.
 */
-   domain_flush_tlb_pde(domain);
+   amd_iommu_domain_flush_tlb_pde(domain);
 
-   domain_flush_complete(domain);
+   amd_iommu_domain_flush_complete(domain);
 
 out:
spin_unlock(&dev_data->lock);
@@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct 
protection_domain *domain)
domain_flush_devices(domain);
 }
 
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
 {
struct domain_pgtable pgtable;
 
@@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain 
*domain)
amd_iommu_update_and_flush_device_table(domain);
 
/* Flush domain TLB(s) and wait for completion */
-   domain_flush_tlb_pde(domain);
- 

[PATCH 11/13] iommu: amd: Introduce iommu_v1_iova_to_phys

2020-09-23 Thread Suravee Suthikulpanit
This implements iova_to_phys for AMD IOMMU v1 pagetable,
which will be used by the IO page table framework.

Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd/io_pgtable.c | 21 +
 drivers/iommu/amd/iommu.c  | 16 +---
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index f913fc7b1e58..2f36bab23516 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -525,6 +525,26 @@ unsigned long iommu_unmap_page(struct protection_domain 
*dom,
return unmapped;
 }
 
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned 
long iova)
+{
+   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+   unsigned long offset_mask, pte_pgsize;
+   u64 *pte, __pte;
+
+   if (pgtable->mode == PAGE_MODE_NONE)
+   return iova;
+
+   pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+   if (!pte || !IOMMU_PTE_PRESENT(*pte))
+   return 0;
+
+   offset_mask = pte_pgsize - 1;
+   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
+
+   return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
 struct io_pgtable_ops *amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data 
*dev_data,
 struct protection_domain *domain)
 {
@@ -551,6 +571,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct 
io_pgtable_cfg *cfg, void *coo
 {
struct protection_domain *pdom = (struct protection_domain *)cookie;
 
+   pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
return &pdom->iop.iop;
 }
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 87cea1cde414..9a1a16031e00 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct 
iommu_domain *dom,
 {
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
-   struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
-   unsigned long offset_mask, pte_pgsize;
-   u64 *pte, __pte;
 
-   if (domain->iop.mode == PAGE_MODE_NONE)
-   return iova;
-
-   pte = fetch_pte(pgtable, iova, &pte_pgsize);
-
-   if (!pte || !IOMMU_PTE_PRESENT(*pte))
-   return 0;
-
-   offset_mask = pte_pgsize - 1;
-   __pte   = __sme_clr(*pte & PM_ADDR_MASK);
-
-   return (__pte & ~offset_mask) | (iova & offset_mask);
+   return ops->iova_to_phys(ops, iova);
 }
 
 static bool amd_iommu_capable(enum iommu_cap cap)
-- 
2.17.1



  1   2   3   4   5   6   7   8   9   10   >