[PATCH 2/2] iommu/amd: Remove performance counter pre-initialization test
In early AMD desktop/mobile platforms (during 2013), when the IOMMU Performance Counter (PMC) support was first introduced in commit 30861ddc9cca ("perf/x86/amd: Add IOMMU Performance Counter resource management"), there was a HW bug where the counters could not be accessed. The result was reading of the counter always return zero. At the time, the suggested workaround was to add a test logic prior to initializing the PMC feature to check if the counters can be programmed and read back the same value. This has been working fine until the more recent desktop/mobile platforms start enabling power gating for the PMC, which prevents access to the counters. This results in the PMC support being disabled unnecesarily. Unfortunatly, there is no documentation of since which generation of hardware the original PMC HW bug was fixed. Although, it was fixed soon after the first introduction of the PMC. Base on this, we assume that the buggy platforms are less likely to be in used, and it should be relatively safe to remove this legacy logic. Link: https://lore.kernel.org/linux-iommu/alpine.lnx.3.20.13.2006030935570.3...@monopod.intra.ispras.ru/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753 Cc: Tj (Elloe Linux) Cc: Shuah Khan Cc: Alexander Monakov Cc: David Coe Cc: Paul Menzel Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/init.c | 24 +--- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 648cdfd03074..247cdda5d683 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1714,33 +1714,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - static void init_iommu_perf_ctr(struct amd_iommu *iommu) { + u64 val; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; if (!iommu_feature(iommu, FEATURE_PC)) return; amd_iommu_pc_present = true; - /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) - goto pc_false; - - /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) - goto pc_false; - - /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) - goto pc_false; - pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); @@ -1748,11 +1731,6 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) iommu->max_counters = (u8) ((val >> 7) & 0xf); return; - -pc_false: - pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n"); - amd_iommu_pc_present = false; - return; } static ssize_t amd_iommu_show_cap(struct device *dev, -- 2.17.1
[PATCH 1/2] Revert "iommu/amd: Fix performance counter initialization"
From: Paul Menzel This reverts commit 6778ff5b21bd8e78c8bd547fd66437cf2657fd9b. The original commit tries to address an issue, where PMC power-gating causing the IOMMU PMC pre-init test to fail on certain desktop/mobile platforms where the power-gating is normally enabled. There have been several reports that the workaround still does not guarantee to work, and can add up to 100 ms (on the worst case) to the boot process on certain platforms such as the MSI B350M MORTAR with AMD Ryzen 3 2200G. Therefore, revert this commit as a prelude to removing the pre-init test. Link: https://lore.kernel.org/linux-iommu/alpine.lnx.3.20.13.2006030935570.3...@monopod.intra.ispras.ru/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753 Cc: Tj (Elloe Linux) Cc: Shuah Khan Cc: Alexander Monakov Cc: David Coe Signed-off-by: Paul Menzel Signed-off-by: Suravee Suthikulpanit --- Note: I have revised the commit message to add more detail and remove uncessary information. drivers/iommu/amd/init.c | 45 ++-- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 321f5906e6ed..648cdfd03074 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -257,8 +256,6 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -1717,11 +1714,13 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); + +static void init_iommu_perf_ctr(struct amd_iommu *iommu) { - int retry; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg, save_src; + u64 val = 0xabcd, val2 = 0, save_reg = 0; if (!iommu_feature(iommu, FEATURE_PC)) return; @@ -1729,39 +1728,17 @@ static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || - iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) - goto pc_false; - - /* -* Disable power gating by programing the performance counter -* source to 20 (i.e. counts the reads and writes from/to IOMMU -* Reserved Register [MMIO Offset 1FF8h] that are ignored.), -* which never get incremented during this init phase. -* (Note: The event is also deprecated.) -*/ - val = 20; - if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) goto pc_false; /* Check if the performance counters can be written to */ - val = 0xabcd; - for (retry = 5; retry; retry--) { - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || - iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || - val2) - break; - - /* Wait about 20 msec for power gating to disable and retry. */ - msleep(20); - } - - /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || - iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) + if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || + (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || + (val != val2)) goto pc_false; - if (val != val2) + /* restore */ + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) goto pc_false; pci_info(pdev, "IOMMU performance counters supported\n"); -- 2.17.1
[PATCH 0/2] iommu/amd: Revert and remove failing PMC test
This has prevented PMC to work on more recent desktop/mobile platforms, where the PMC power-gating is normally enabled. After consulting with HW designers and IOMMU maintainer, we have decide to remove the legacy test altogether to avoid future PMC enabling issues. Thanks the community for helping to test, investigate, provide data and report issues on several platforms in the field. Regards, Suravee Paul Menzel (1): Revert "iommu/amd: Fix performance counter initialization" Suravee Suthikulpanit (1): iommu/amd: Remove performance counter pre-initialization test drivers/iommu/amd/init.c | 49 ++-- 1 file changed, 2 insertions(+), 47 deletions(-) -- 2.17.1
Re: [RFC PATCH 5/7] iommu/amd: Add support for Guest IO protection
Joerg, On 3/18/21 10:31 PM, Joerg Roedel wrote: On Fri, Mar 12, 2021 at 03:04:09AM -0600, Suravee Suthikulpanit wrote: @@ -519,6 +521,7 @@ struct protection_domain { spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int glx;/* Number of levels for GCR3 table */ + bool giov; /* guest IO protection domain */ Could this be turned into a flag? Good point. I'll convert to use the protection_domain.flags. Thanks, Suravee
Re: [RFC PATCH 6/7] iommu/amd: Introduce amd_iommu_pgtable command-line option
Joerg, On 3/18/21 10:33 PM, Joerg Roedel wrote: On Fri, Mar 12, 2021 at 03:04:10AM -0600, Suravee Suthikulpanit wrote: To allow specification whether to use v1 or v2 IOMMU pagetable for DMA remapping when calling kernel DMA-API. Signed-off-by: Suravee Suthikulpanit --- Documentation/admin-guide/kernel-parameters.txt | 6 ++ drivers/iommu/amd/init.c| 15 +++ 2 files changed, 21 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 04545725f187..466e807369ea 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -319,6 +319,12 @@ This mode requires kvm-amd.avic=1. (Default when IOMMU HW support is present.) + amd_iommu_pgtable= [HW,X86-64] + Specifies one of the following AMD IOMMU page table to + be used for DMA remapping for DMA-API: + v1 - Use v1 page table (Default) + v2 - Use v2 page table Any reason v2 can not be the default when it is supported by the IOMMU? Eventually, we should be able to default to v2. However, we will need to make sure that the v2 implementation will have comparable performance as currently used v1. FYI: I'm also looking into adding support for SVA as well. Thanks, Suravee
[RFC PATCH 7/7] iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API
Introduce init function for setting up DMA domain for DMA-API with the IOMMU v2 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/iommu.c | 21 + 1 file changed, 21 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e29ece6e1e68..bd26de8764bd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1937,6 +1937,24 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) return 0; } +static int protection_domain_init_v2(struct protection_domain *domain) +{ + spin_lock_init(&domain->lock); + domain->id = domain_id_alloc(); + if (!domain->id) + return -ENOMEM; + INIT_LIST_HEAD(&domain->dev_list); + + domain->giov = true; + + if (amd_iommu_pgtable == AMD_IOMMU_V2 && + domain_enable_v2(domain, 1, false)) { + return -ENOMEM; + } + + return 0; +} + static struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; @@ -1964,6 +1982,9 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) case AMD_IOMMU_V1: ret = protection_domain_init_v1(domain, mode); break; + case AMD_IOMMU_V2: + ret = protection_domain_init_v2(domain); + break; default: ret = -EINVAL; } -- 2.17.1
[RFC PATCH 6/7] iommu/amd: Introduce amd_iommu_pgtable command-line option
To allow specification whether to use v1 or v2 IOMMU pagetable for DMA remapping when calling kernel DMA-API. Signed-off-by: Suravee Suthikulpanit --- Documentation/admin-guide/kernel-parameters.txt | 6 ++ drivers/iommu/amd/init.c| 15 +++ 2 files changed, 21 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 04545725f187..466e807369ea 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -319,6 +319,12 @@ This mode requires kvm-amd.avic=1. (Default when IOMMU HW support is present.) + amd_iommu_pgtable= [HW,X86-64] + Specifies one of the following AMD IOMMU page table to + be used for DMA remapping for DMA-API: + v1 - Use v1 page table (Default) + v2 - Use v2 page table + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9265c1bf1d84..6d5163bfb87e 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3123,6 +3123,20 @@ static int __init parse_amd_iommu_dump(char *str) return 1; } +static int __init parse_amd_iommu_pgtable(char *str) +{ + for (; *str; ++str) { + if (strncmp(str, "v1", 2) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V1; + break; + } else if (strncmp(str, "v2", 2) == 0) { + amd_iommu_pgtable = AMD_IOMMU_V2; + break; + } + } + return 1; +} + static int __init parse_amd_iommu_intr(char *str) { for (; *str; ++str) { @@ -3246,6 +3260,7 @@ static int __init parse_ivrs_acpihid(char *str) __setup("amd_iommu_dump", parse_amd_iommu_dump); __setup("amd_iommu=", parse_amd_iommu_options); +__setup("amd_iommu_pgtable=", parse_amd_iommu_pgtable); __setup("amd_iommu_intr=", parse_amd_iommu_intr); __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); -- 2.17.1
[RFC PATCH 3/7] iommu/amd: Decouple the logic to enable PPR and GT
Currently, the function to enable iommu v2 (GT) assumes PPR log must also be enabled. This is no longer the case since the IOMMU v2 page table can be enabled without PRR support (for DMA-API use case). Therefore, separate the enabling logic for PPR and GT. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/init.c | 19 +-- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9126efcbaf2c..5def566de6f6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -898,14 +898,6 @@ static void iommu_enable_xt(struct amd_iommu *iommu) #endif /* CONFIG_IRQ_REMAP */ } -static void iommu_enable_gt(struct amd_iommu *iommu) -{ - if (!iommu_feature(iommu, FEATURE_GT)) - return; - - iommu_feature_enable(iommu, CONTROL_GT_EN); -} - /* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { @@ -1882,6 +1874,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = glxval; else amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); + iommu_feature_enable(iommu, CONTROL_GT_EN); } if (iommu_feature(iommu, FEATURE_GT) && @@ -2530,21 +2523,19 @@ static void early_enable_iommus(void) #endif } -static void enable_iommus_v2(void) +static void enable_iommus_ppr(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { + for_each_iommu(iommu) iommu_enable_ppr_log(iommu); - iommu_enable_gt(iommu); - } } static void enable_iommus(void) { early_enable_iommus(); - enable_iommus_v2(); + enable_iommus_ppr(); } static void disable_iommus(void) @@ -2935,7 +2926,7 @@ static int __init state_next(void) register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; - enable_iommus_v2(); + enable_iommus_ppr(); break; case IOMMU_PCI_INIT: ret = amd_iommu_enable_interrupts(); -- 2.17.1
[RFC PATCH 5/7] iommu/amd: Add support for Guest IO protection
AMD IOMMU introduces support for Guest I/O protection where the request from the I/O device without a PASID are treated as if they have PASID 0. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c| 8 drivers/iommu/amd/iommu.c | 4 3 files changed, 15 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 25062eb86c8b..876ba1adf73e 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -93,6 +93,7 @@ #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) #define FEATURE_GAM_VAPIC (1ULL<<21) +#define FEATURE_GIOSUP (1ULL<<48) #define FEATURE_EPHSUP (1ULL<<50) #define FEATURE_SNP(1ULL<<63) @@ -366,6 +367,7 @@ #define DTE_FLAG_IW (1ULL << 62) #define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GIOV (1ULL << 54) #define DTE_FLAG_GV(1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) @@ -519,6 +521,7 @@ struct protection_domain { spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int glx;/* Number of levels for GCR3 table */ + bool giov; /* guest IO protection domain */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 5def566de6f6..9265c1bf1d84 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1895,6 +1895,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); + if (amd_iommu_pgtable == AMD_IOMMU_V2 && + !iommu_feature(iommu, FEATURE_GIOSUP)) { + pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } + if (is_rd890_iommu(iommu->dev)) { int i, j; @@ -1969,6 +1975,8 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } + if (amd_iommu_pgtable == AMD_IOMMU_V2) + pr_info("GIOV enabled\n"); } static int __init amd_iommu_init_pci(void) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f3800efdbb29..e29ece6e1e68 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1405,6 +1405,10 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; + + if (domain->giov && (domain->flags & PD_IOMMUV2_MASK)) + pte_root |= DTE_FLAG_GIOV; + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; flags = amd_iommu_dev_table[devid].data[1]; -- 2.17.1
[RFC PATCH 4/7] iommu/amd: Initial support for AMD IOMMU v2 page table
Introduce IO page table framework support for AMD IOMMU v2 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 + drivers/iommu/amd/io_pgtable_v2.c | 239 drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 2 + 5 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/amd/io_pgtable_v2.c diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index a935f8f4b974..773d8aa00283 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 6937e3674a16..25062eb86c8b 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -265,6 +265,7 @@ * 512GB Pages are not supported due to a hardware bug */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 12) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -503,6 +504,7 @@ struct amd_io_pgtable { int mode; u64 *root; atomic64_t pt_root;/* pgtable root and pgtable mode */ + struct mm_structv2_mm; }; /* diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index ..b0b6ba2d8d35 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt)pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static pte_t *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, + unsigned long *page_size) +{ + int level; + pte_t *ptep; + + ptep = lookup_address_in_mm(&pgtable->v2_mm, iova, &level); + if (!ptep || pte_none(*ptep) || (level == PG_LEVEL_NONE)) + return NULL; + + *page_size = PTE_LEVEL_PAGE_SIZE(level-1); + return ptep; +} + +static pte_t *v2_pte_alloc_map(struct mm_struct *mm, unsigned long vaddr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(mm, vaddr); + p4d = p4d_alloc(mm, pgd, vaddr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, vaddr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, vaddr); + if (!pmd) + return NULL; + pte = pte_alloc_map(mm, pmd, vaddr); + return pte; +} + +static int iommu_v2_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + pte_t *pte; + int ret, i, count; + bool updated = false; + unsigned long o_iova = iova; + unsigned long pte_pgsize; + + BUG_ON(!IS_ALIGNED(iova, size) || !IS_ALIGNED(paddr, size)); + + ret = -EINVAL; + if (!(prot & IOMMU_PROT_MASK)) + goto out; + + count = PAGE_SIZE_PTE_COUNT(size); + + for (i = 0; i < count; ++i, iova += PAGE_SIZE, paddr += PAGE_SIZE) { + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || pte_none(*pte)) { + pte = v2_pte_alloc_map(&dom->iop.v2_mm, iova); + if (!pte) + goto out; + } else { + updated = true; + } + set_pte(pte, __pte((paddr & PAGE_MASK)|_PAGE_PRESENT|_PAGE_USER)); + if (prot & IOMMU_PROT_IW) + *pte = pte_mkwrite(*pte); + } + + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&dom->domain, 0); + else + amd_iommu_flush_page(&dom->domain, 0, o_iova); + }
[RFC PATCH 2/7] iommu/amd: Update sanity check when enable PRI/ATS
Currently, PPR/ATS can be enabled only if the domain is type identity mapping. However, when we allow the IOMMU v2 page table to be used for DMA-API, the sanity check needs to be updated to only apply for the case when using AMD_IOMMU_V1 page table mode. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/iommu.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6f3e42495709..f3800efdbb29 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1549,7 +1549,7 @@ static int pri_reset_while_enabled(struct pci_dev *pdev) return 0; } -static int pdev_iommuv2_enable(struct pci_dev *pdev) +static int pdev_pri_ats_enable(struct pci_dev *pdev) { bool reset_enable; int reqs, ret; @@ -1624,11 +1624,19 @@ static int attach_device(struct device *dev, struct iommu_domain *def_domain = iommu_get_dma_domain(dev); ret = -EINVAL; - if (def_domain->type != IOMMU_DOMAIN_IDENTITY) + + /* +* In case of using AMD_IOMMU_V1 page table mode, and the device +* is enabling for PPR/ATS support (using v2 table), +* we need to make sure that the domain type is identity map. +*/ + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + def_domain->type != IOMMU_DOMAIN_IDENTITY) { goto out; + } if (dev_data->iommu_v2) { - if (pdev_iommuv2_enable(pdev) != 0) + if (pdev_pri_ats_enable(pdev) != 0) goto out; dev_data->ats.enabled = true; -- 2.17.1
[RFC PATCH 1/7] iommu/amd: Refactor amd_iommu_domain_enable_v2
The current function to enable IOMMU v2 also lock the domain. In order to reuse the same code in different code path, in which the domain has already been locked, refactor the function to separate the locking from the enabling logic. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/iommu.c | 42 +-- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a69a8b573e40..6f3e42495709 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -88,6 +88,7 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); +static int domain_enable_v2(struct protection_domain *domain, int pasids, bool has_ppr); / * @@ -2304,10 +2305,9 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) } EXPORT_SYMBOL(amd_iommu_domain_direct_map); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ +static int domain_enable_v2(struct protection_domain *domain, int pasids, bool has_ppr) { - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; int levels, ret; if (pasids <= 0 || pasids > (PASID_MASK + 1)) @@ -2320,17 +2320,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) if (levels > amd_iommu_max_glx_val) return -EINVAL; - spin_lock_irqsave(&domain->lock, flags); - - /* -* Save us all sanity checks whether devices already in the -* domain support IOMMUv2. Just force that the domain has no -* devices attached when it is switched into IOMMUv2 mode. -*/ - ret = -EBUSY; - if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) - goto out; - ret = -ENOMEM; domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); if (domain->gcr3_tbl == NULL) @@ -2344,8 +2333,31 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) ret = 0; out: - spin_unlock_irqrestore(&domain->lock, flags); + return ret; +} +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) +{ + int ret; + unsigned long flags; + struct protection_domain *pdom = to_pdomain(dom); + + spin_lock_irqsave(&pdom->lock, flags); + + /* +* Save us all sanity checks whether devices already in the +* domain support IOMMUv2. Just force that the domain has no +* devices attached when it is switched into IOMMUv2 mode. +*/ + ret = -EBUSY; + if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) + goto out; + + if (pdom->dev_cnt == 0 && !(pdom->gcr3_tbl)) + ret = domain_enable_v2(pdom, pasids, true); + +out: + spin_unlock_irqrestore(&pdom->lock, flags); return ret; } EXPORT_SYMBOL(amd_iommu_domain_enable_v2); -- 2.17.1
[RFC PATCH 0/7] iommu/amd: Add Generic IO Page Table Framework Support for v2 Page Table
This series introduces a new usage model for the v2 page table, where it can be used to implement support for DMA-API by adopting the generic IO page table framework. One of the target usecases is to support nested IO page tables where the guest uses the guest IO page table (v2) for translating GVA to GPA, and the hypervisor uses the host I/O page table (v1) for translating GPA to SPA. This is a pre-requisite for supporting the new HW-assisted vIOMMU presented at the KVM Forum 2020. https://static.sched.com/hosted_files/kvmforum2020/26/vIOMMU%20KVM%20Forum%202020.pdf The following components are introduced in this series: - Part 1 (patch 1-4 and 7) Refactor the current IOMMU page table v2 code to adopt the generic IO page table framework, and add AMD IOMMU Guest (v2) page table management code. - Part 2 (patch 5) Add support for the AMD IOMMU Guest IO Protection feature (GIOV) where requests from the I/O device without a PASID are treated as if they have PASID of 0. - Part 3 (patch 6) Introduce new amd_iommu_pgtable command-line to allow users to select the mode of operation (v1 or v2). See AMD I/O Virtualization Technology Specification for more detail. http://www.amd.com/system/files/TechDocs/48882_IOMMU_3.05_PUB.pdf Thanks, Suravee Suravee Suthikulpanit (7): iommu/amd: Refactor amd_iommu_domain_enable_v2 iommu/amd: Update sanity check when enable PRI/ATS iommu/amd: Decouple the logic to enable PPR and GT iommu/amd: Initial support for AMD IOMMU v2 page table iommu/amd: Add support for Guest IO protection iommu/amd: Introduce amd_iommu_pgtable command-line option iommu/amd: Add support for using AMD IOMMU v2 page table for DMA-API .../admin-guide/kernel-parameters.txt | 6 + drivers/iommu/amd/Makefile| 2 +- drivers/iommu/amd/amd_iommu_types.h | 5 + drivers/iommu/amd/init.c | 42 ++- drivers/iommu/amd/io_pgtable_v2.c | 239 ++ drivers/iommu/amd/iommu.c | 81 -- drivers/iommu/io-pgtable.c| 1 + include/linux/io-pgtable.h| 2 + 8 files changed, 345 insertions(+), 33 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable_v2.c -- 2.17.1
Re: [PATCH] iommu/amd: Fix event counter availability check
This fix has been accepted in the upstream recently. https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/commit/?h=x86/amd Could you please give this a try? Thanks, Suravee On 2/21/21 8:49 PM, Paul Menzel wrote: Dear Suravee, Am 17.09.20 um 19:55 schrieb Alexander Monakov: On Tue, 16 Jun 2020, Suravee Suthikulpanit wrote: Instead of blindly moving the code around to a spot that would just work, I am trying to understand what might be required here. In this case, the init_device_table_dma()should not be needed. I suspect it's the IOMMU invalidate all command that's also needed here. I'm also checking with the HW and BIOS team. Meanwhile, could you please give the following change a try: Hello. Can you give any update please? […] Sorry for late reply. I have a reproducer and working with the HW team to understand the issue. I should be able to provide update with solution by the end of this week. Hello, hope you are doing well. Has this investigation found anything? I am wondering the same. It’d be great to have this fixed in the upstream Linux kernel. Kind regards, Paul
[PATCH] iommu/amd: Fix performance counter initialization
Certain AMD platforms enable power gating feature for IOMMU PMC, which prevents the IOMMU driver from updating the counter while trying to validate the PMC functionality in the init_iommu_perf_ctr(). This results in disabling PMC support and the following error message: "AMD-Vi: Unable to read/write to IOMMU perf counter" To workaround this issue, disable power gating temporarily by programming the counter source to non-zero value while validating the counter, and restore the prior state afterward. Tested-by: Tj (Elloe Linux) Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201753 Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/init.c | 45 ++-- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 83d8ab2aed9f..01da76dc1caa 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -254,6 +255,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -1712,13 +1715,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - -static void init_iommu_perf_ctr(struct amd_iommu *iommu) +static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) { + int retry; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; + u64 val = 0xabcd, val2 = 0, save_reg, save_src; if (!iommu_feature(iommu, FEATURE_PC)) return; @@ -1726,17 +1727,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) goto pc_false; - /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) + /* +* Disable power gating by programing the performance counter +* source to 20 (i.e. counts the reads and writes from/to IOMMU +* Reserved Register [MMIO Offset 1FF8h] that are ignored.), +* which never get incremented during this init phase. +* (Note: The event is also deprecated.) +*/ + val = 20; + if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) goto pc_false; + /* Check if the performance counters can be written to */ + val = 0xabcd; + for (retry = 5; retry; retry--) { + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || + val2) + break; + + /* Wait about 20 msec for power gating to disable and retry. */ + msleep(20); + } + /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) + goto pc_false; + + if (val != val2) goto pc_false; pci_info(pdev, "IOMMU performance counters supported\n"); -- 2.17.1
Re: [PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support
On 1/27/21 7:06 PM, Joerg Roedel wrote: Hi Suravee, On Tue, Dec 15, 2020 at 01:36:52AM -0600, Suravee Suthikulpanit wrote: Suravee Suthikulpanit (13): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table Applied this series, thanks for the work! Given testing goes well you can consider this queued for 5.12. Thanks, Joerg Thanks Joerg and Will, and welcome back!!! Suravee
Re: [PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support
Hi Joerg / Will, Happy New Year!! Just want to follow up on this series. Thanks, Suravee On 12/15/20 2:36 PM, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V3 (https://lore.kernel.org/linux-iommu/20201004014549.16065-1-suravee.suthikulpa...@amd.com/) - Rebase to v5.10 - Patch 2: Add struct iommu_flush_ops (previously in patch 13 of v3) - Patch 7: Consolidate logic into v1_free_pgtable() instead of amd_iommu_free_pgtable() - Patch 12: Check ops->[map|unmap] before calling. - Patch 13: Setup page table when allocating domain (instead of when attaching device). Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2: Introduce helper function io_pgtable_cfg_to_data. - Patch 13: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (13): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/init.c| 2 + drivers/iommu/amd/io_pgtable.c | 564 +++ drivers/iommu/amd/iommu.c | 672 drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 9 files changed, 707 insertions(+), 604 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c
[PATCH v4 12/13] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
These implement map and unmap for AMD IOMMU v1 pagetable, which will be used by the IO pagetable framework. Also clean up unused extern function declarations. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 - drivers/iommu/amd/io_pgtable.c | 25 - drivers/iommu/amd/iommu.c | 13 - 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 83ca822c5349..3770b1a4d51c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -133,19 +133,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -/* TODO: These are temporary and will be removed once fully transition */ -extern int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp); -extern unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size); -extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index a293b69b38b9..d91964e98d58 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size) +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) { int level; u64 *pte; @@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -int iommu_map_page(struct protection_domain *dom, - unsigned long iova, - unsigned long paddr, - unsigned long size, - int prot, - gfp_t gfp) +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct page *freelist = NULL; bool updated = false; u64 __pte, *pte; @@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom, return ret; } -unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long iova, - unsigned long size) +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) { - struct io_pgtable_ops *ops = &dom->iop.iop.ops; struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; @@ -554,6 +551,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas= IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb= &v1_flush_ops; + pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.unmap= iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pgtable->iop; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 29b7fefc8485..1f04b251f0c6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2066,8 +2066,9 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; - int ret; + int ret = -EINVAL; if (domain->iop.mode == PAGE_MODE_NONE) return -EINVAL; @@ -2077,9 +2078,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; -
[PATCH v4 11/13] iommu/amd: Introduce iommu_v1_iova_to_phys
This implements iova_to_phys for AMD IOMMU v1 pagetable, which will be used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 22 ++ drivers/iommu/amd/iommu.c | 16 +--- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 87184b6cee0f..a293b69b38b9 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, return unmapped; } +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + /* * */ @@ -534,6 +554,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo cfg->oas= IOMMU_OUT_ADDR_BIT_SIZE, cfg->tlb= &v1_flush_ops; + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 76f61dd6b89f..29b7fefc8485 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2101,22 +2101,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - unsigned long offset_mask, pte_pgsize; - u64 *pte, __pte; - if (domain->iop.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(pgtable, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) -- 2.17.1
[PATCH v4 04/13] iommu/amd: Convert to using amd_io_pgtable
Make use of the new struct amd_io_pgtable in preparation to remove the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 25 ++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b8dae3941f0f..bf9723b35e77 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -56,6 +56,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5b93536d6877..fdb6030b505d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -89,8 +89,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); / * @@ -1502,7 +1500,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); domain_flush_complete(domain); /* @@ -1877,17 +1875,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1977,7 +1974,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2284,22 +2281,20 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * */ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } @@ -2309,7 +2304,7 @@ static void update_domain(struct protection_domain *domain) /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); -- 2.17.1
[PATCH v4 02/13] iommu/amd: Prepare for generic IO page table framework
Add initial hook up code to implement generic IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 35 ++ drivers/iommu/amd/io_pgtable.c | 75 + drivers/iommu/amd/iommu.c | 10 drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 + 7 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 494b42a31b7a..5d77f34e0fda 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -465,6 +479,26 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ +struct protection_domain, iop) + +#define io_pgtable_cfg_to_data(x) \ + container_of((x), struct amd_io_pgtable, pgtbl_cfg) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -473,6 +507,7 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ atomic64_t pt_root; /* pgtable root and pgtable mode */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index ..aedf2c932c40 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt)pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_flush_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, +unsigned long iova, size_t granule, +
[PATCH v4 10/13] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
To simplify the fetch_pte function. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 13 +++-- drivers/iommu/amd/iommu.c | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 76276d9e463c..83ca822c5349 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -143,7 +143,7 @@ extern int iommu_map_page(struct protection_domain *dom, extern unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long page_size); -extern u64 *fetch_pte(struct protection_domain *domain, +extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 35dd9153e6b7..87184b6cee0f 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct protection_domain *domain, +u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size) { @@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain, *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->iop.mode)) + if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL; - level = domain->iop.mode - 1; - pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte= &pgtable->root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long iova, unsigned long size) { + struct io_pgtable_ops *ops = &dom->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; @@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped = 0; while (unmapped < size) { - pte = fetch_pte(dom, iova, &unmap_size); - + pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { int i, count; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2963a37b7c16..76f61dd6b89f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2100,13 +2100,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; if (domain->iop.mode == PAGE_MODE_NONE) return iova; - pte = fetch_pte(domain, iova, &pte_pgsize); + pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; -- 2.17.1
[PATCH v4 08/13] iommu/amd: Remove amd_iommu_domain_get_pgtable
Since the IO page table root and mode parameters have been moved into the struct amd_io_pg, the function is no longer needed. Therefore, remove it along with the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/amd_iommu_types.h | 6 - drivers/iommu/amd/io_pgtable.c | 36 ++--- drivers/iommu/amd/iommu.c | 34 --- 4 files changed, 19 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 91d098003f12..76276d9e463c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -110,6 +110,8 @@ static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } static inline @@ -144,8 +146,6 @@ extern unsigned long iommu_unmap_page(struct protection_domain *dom, extern u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size); -extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, -struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7c971c76d685..6897567d307e 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -518,12 +518,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index dc674e79ddf0..d4d131e43dcd 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -184,30 +184,27 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { - struct domain_pgtable pgtable; unsigned long flags; bool ret = true; u64 *pte; spin_lock_irqsave(&domain->lock, flags); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) goto out; ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); - pgtable.root = pte; - pgtable.mode += 1; + domain->iop.root = pte; + domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); amd_iommu_domain_flush_complete(domain); @@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain *domain, * Device Table needs to be updated and flushed before the new root can * be published. */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); ret = true; @@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { - struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { /* * Return an error if there is no memory to update the * page-table. */ if (!increase_address_space(domain, address, gfp)) return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); } - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain->iop.mode - 1; + pte = &domain->iop.root[PM_LEVEL_INDE
[PATCH v4 06/13] iommu/amd: Move IO page table related functions
Preparing to migrate to use IO page table framework. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 18 ++ drivers/iommu/amd/io_pgtable.c | 473 drivers/iommu/amd/iommu.c | 476 + 3 files changed, 493 insertions(+), 474 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index bf29ab8c99f0..1bad42a3c73c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -131,4 +131,22 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +/* TODO: These are temporary and will be removed once fully transition */ +extern void free_pagetable(struct domain_pgtable *pgtable); +extern int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + unsigned long page_size, + int prot, + gfp_t gfp); +extern unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size); +extern u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size); +extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, +struct domain_pgtable *pgtable); +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, +u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index aedf2c932c40..345e9bc81fde 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -50,6 +50,479 @@ static const struct iommu_flush_ops v1_flush_ops = { .tlb_add_page = v1_tlb_add_page, }; +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, +unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/ + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + / + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)
[PATCH v4 13/13] iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table
Switch to using IO page table framework for AMD IOMMU v1 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/init.c | 2 ++ drivers/iommu/amd/iommu.c | 48 ++- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 3770b1a4d51c..91452e0ff072 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -36,6 +36,7 @@ extern void amd_iommu_disable(void); extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; +extern enum io_pgtable_fmt amd_iommu_pgtable; /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 23a790f8f550..5fb4bea14cc4 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -147,6 +147,8 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; +enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; + int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 1f04b251f0c6..571e8806e4a1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1901,7 +1902,7 @@ static void protection_domain_free(struct protection_domain *domain) kfree(domain); } -static int protection_domain_init(struct protection_domain *domain, int mode) +static int protection_domain_init_v1(struct protection_domain *domain, int mode) { u64 *pt_root = NULL; @@ -1924,34 +1925,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode) return 0; } -static struct protection_domain *protection_domain_alloc(int mode) +static struct protection_domain *protection_domain_alloc(unsigned int type) { + struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; + int pgtable = amd_iommu_pgtable; + int mode = DEFAULT_PGTABLE_LEVEL; + int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - if (protection_domain_init(domain, mode)) + /* +* Force IOMMU v1 page table when iommu=pt and +* when allocating domain for pass-through devices. +*/ + if (type == IOMMU_DOMAIN_IDENTITY) { + pgtable = AMD_IOMMU_V1; + mode = PAGE_MODE_NONE; + } else if (type == IOMMU_DOMAIN_UNMANAGED) { + pgtable = AMD_IOMMU_V1; + } + + switch (pgtable) { + case AMD_IOMMU_V1: + ret = protection_domain_init_v1(domain, mode); + break; + default: + ret = -EINVAL; + } + + if (ret) goto out_err; - return domain; + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + if (!pgtbl_ops) + goto out_err; + return domain; out_err: kfree(domain); - return NULL; } static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; - int mode = DEFAULT_PGTABLE_LEVEL; - - if (type == IOMMU_DOMAIN_IDENTITY) - mode = PAGE_MODE_NONE; - domain = protection_domain_alloc(mode); + domain = protection_domain_alloc(type); if (!domain) return NULL; @@ -2070,7 +2092,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, int prot = 0; int ret = -EINVAL; - if (domain->iop.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2093,7 +2116,8 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - if (domain->iop.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return 0; return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; -- 2.17.1
[PATCH v4 07/13] iommu/amd: Restructure code for freeing page table
By consolidate logic into v1_free_pgtable helper function, which is called from IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/io_pgtable.c | 41 -- drivers/iommu/amd/iommu.c | 21 - 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1bad42a3c73c..91d098003f12 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -132,7 +132,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif /* TODO: These are temporary and will be removed once fully transition */ -extern void free_pagetable(struct domain_pgtable *pgtable); extern int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 345e9bc81fde..dc674e79ddf0 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -163,23 +163,6 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -void free_pagetable(struct domain_pgtable *pgtable) -{ - struct page *freelist = NULL; - unsigned long root; - - if (pgtable->mode == PAGE_MODE_NONE) - return; - - BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); - - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); - - free_page_list(freelist); -} - void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { @@ -528,6 +511,30 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, */ static void v1_free_pgtable(struct io_pgtable *iop) { + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct protection_domain *dom; + struct page *freelist = NULL; + unsigned long root; + + if (pgtable->mode == PAGE_MODE_NONE) + return; + + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); + + free_page_list(freelist); } static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e823a457..37ecedce2c14 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1903,17 +1903,14 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); kfree(domain); } @@ -2302,22 +2299,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); spin_unlock_irqrestore(&domain->lock, flags); } -- 2.17.1
[PATCH v4 03/13] iommu/amd: Move pt_root to struct amd_io_pgtable
To better organize the data structure since it contains IO page table related information. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0817bc732d1a..b8dae3941f0f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -105,7 +105,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->pt_root, root); + atomic64_set(&domain->iop.pt_root, root); } static inline diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5d77f34e0fda..7c971c76d685 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -497,6 +497,7 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; + atomic64_t pt_root;/* pgtable root and pgtable mode */ }; /* @@ -510,7 +511,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx;/* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 45d3977d6c00..5b93536d6877 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -145,7 +145,7 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable) { - u64 pt_root = atomic64_read(&domain->pt_root); + u64 pt_root = atomic64_read(&domain->iop.pt_root); pgtable->root = (u64 *)(pt_root & PAGE_MASK); pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -- 2.17.1
[PATCH v4 05/13] iommu/amd: Declare functions as extern
And move declaration to header file so that they can be included across multiple files. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 39 +-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index bf9723b35e77..bf29ab8c99f0 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -57,6 +57,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index fdb6030b505d..1b10710c91cf 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -87,7 +87,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); / @@ -1314,12 +1313,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1344,7 +1343,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1501,7 +1500,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; amd_iommu_update_and_flush_device_table(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can @@ -1754,8 +1753,8 @@ static int iommu_map_page(struct protection_domain *dom, * Updates and flushing already happened in * increase_address_space(). */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1998,10 +1997,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2134,9 +2133,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2298,7 +2297,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { struct domain_pgtable pgtable; @@ -2307,8 +2306,8 @@ static void update_domain(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); -
[PATCH v4 09/13] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops
There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 31 +++ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index d4d131e43dcd..35dd9153e6b7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * and full 64 bit address spaces. */ int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, + unsigned long iova, + unsigned long paddr, + unsigned long size, int prot, gfp_t gfp) { @@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom, updated = true; if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom, } unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) + unsigned long iova, + unsigned long size) { unsigned long long unmapped; unsigned long unmap_size; u64 *pte; - BUG_ON(!is_power_of_2(page_size)); + BUG_ON(!is_power_of_2(size)); unmapped = 0; - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); + while (unmapped < size) { + pte = fetch_pte(dom, iova, &unmap_size); if (pte) { int i, count; @@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, pte[i] = 0ULL; } - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } -- 2.17.1
[PATCH v4 01/13] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
Move the function to header file to allow inclusion in other files. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 + drivers/iommu/amd/iommu.c | 10 -- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf71714..0817bc732d1a 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -102,6 +102,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->pt_root, root); +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b9cf59443843..7f6b0f60b958 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -161,16 +161,6 @@ static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ } -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { -- 2.17.1
[PATCH v4 00/13] iommu/amd: Add Generic IO Page Table Framework Support
The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V3 (https://lore.kernel.org/linux-iommu/20201004014549.16065-1-suravee.suthikulpa...@amd.com/) - Rebase to v5.10 - Patch 2: Add struct iommu_flush_ops (previously in patch 13 of v3) - Patch 7: Consolidate logic into v1_free_pgtable() instead of amd_iommu_free_pgtable() - Patch 12: Check ops->[map|unmap] before calling. - Patch 13: Setup page table when allocating domain (instead of when attaching device). Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2: Introduce helper function io_pgtable_cfg_to_data. - Patch 13: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (13): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Adopt IO page table framework for AMD IOMMU v1 page table drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/init.c| 2 + drivers/iommu/amd/io_pgtable.c | 564 +++ drivers/iommu/amd/iommu.c | 672 drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 9 files changed, 707 insertions(+), 604 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c -- 2.17.1
[PATCH] iommu/amd: Add sanity check for interrupt remapping table length macros
Currently, macros related to the interrupt remapping table length are defined separately. This has resulted in an oversight in which one of the macros were missed when changing the length. To prevent this, redefine the macros to add built-in sanity check. Also, rename macros to use the name of the DTE[IntTabLen] field as specified in the AMD IOMMU specification. There is no functional change. Suggested-by: Linus Torvalds Reviewed-by: Tom Lendacky Signed-off-by: Suravee Suthikulpanit Cc: Will Deacon Cc: Jerry Snitselaar Cc: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 19 ++- drivers/iommu/amd/init.c| 6 +++--- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 494b42a31b7a..899ce62df3f0 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -255,11 +255,19 @@ /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) -#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) #define DTE_IRQ_REMAP_INTCTL(2ULL << 60) -#define DTE_IRQ_TABLE_LEN (9ULL << 1) #define DTE_IRQ_REMAP_ENABLE1ULL +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define DTE_INTTAB_ALIGNMENT128 +#define DTE_INTTABLEN_VALUE 9ULL +#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1) +#define DTE_INTTABLEN_MASK (0xfULL << 1) +#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE) + #define PAGE_MODE_NONE0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 @@ -409,13 +417,6 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -/* - * AMD IOMMU hardware only support 512 IRTEs despite - * the architectural limitation of 2048 entries. - */ -#define MAX_IRQS_PER_TABLE 512 -#define IRQ_TABLE_ALIGNMENT128 - struct irq_remap_table { raw_spinlock_t lock; unsigned min_index; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 23a790f8f550..6bec8913d064 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -989,10 +989,10 @@ static bool copy_device_table(void) irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; - int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK; + int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; if (irq_v && (int_ctl || int_tab_len)) { if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_IRQ_TABLE_LEN)) { + (int_tab_len != DTE_INTTABLEN)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); return false; } @@ -2674,7 +2674,7 @@ static int __init early_amd_iommu_init(void) remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", remap_cache_sz, - IRQ_TABLE_ALIGNMENT, + DTE_INTTAB_ALIGNMENT, 0, NULL); if (!amd_iommu_irq_cache) goto out; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b9cf59443843..f7abf16d1e3a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3191,7 +3191,7 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) dte &= ~DTE_IRQ_PHYS_ADDR_MASK; dte |= iommu_virt_to_phys(table->table); dte |= DTE_IRQ_REMAP_INTCTL; - dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_INTTABLEN; dte |= DTE_IRQ_REMAP_ENABLE; amd_iommu_dev_table[devid].data[2] = dte; -- 2.17.1
Re: [GIT PULL] IOMMU fix for 5.10 (-final)
Hi All, On 12/10/20 1:50 AM, Will Deacon wrote: On Wed, Dec 09, 2020 at 10:07:46AM -0800, Linus Torvalds wrote: On Wed, Dec 9, 2020 at 6:12 AM Will Deacon wrote: Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix for a fix, where the size of the interrupt remapping table was increased but a related constant for the size of the interrupt table was forgotten. Pulled. Thanks. However, why didn't this then add some sanity checking for the two different #defines to be in sync? IOW, something like #define AMD_IOMMU_IRQ_TABLE_SHIFT 9 #define MAX_IRQS_PER_TABLE (1 << AMD_IOMMU_IRQ_TABLE_SHIFT) #define DTE_IRQ_TABLE_LEN ((u64)AMD_IOMMU_IRQ_TABLE_SHIFT << 1) or whatever. Hmm? This looks like a worthwhile change to me, but I don't have any hardware so I've been very reluctant to make even "obvious" driver changes here. Suravee -- please can you post a patch implementing the above? I'll send one out ASAP. That way this won't happen again, but perhaps equally importantly the linkage will be more clear, and there won't be those random constants. Naming above is probably garbage - I assume there's some actual architectural name for that irq table length field in the DTE? The one in the spec is even better: "IntTabLen". Will Thanks, Suravee
[PATCH] iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs
According to the AMD IOMMU spec, the commit 73db2fc595f3 ("iommu/amd: Increase interrupt remapping table limit to 512 entries") also requires the interrupt table length (IntTabLen) to be set to 9 (power of 2) in the device table mapping entry (DTE). Fixes: 73db2fc595f3 ("iommu/amd: Increase interrupt remapping table limit to 512 entries") Reported-by: Jerry Snitselaar Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 89647700bab2..494b42a31b7a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -257,7 +257,7 @@ #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) #define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) #define DTE_IRQ_REMAP_INTCTL(2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_TABLE_LEN (9ULL << 1) #define DTE_IRQ_REMAP_ENABLE1ULL #define PAGE_MODE_NONE0x00 -- 2.17.1
Re: [PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries
Jerry, On 12/2/20 6:53 AM, Jerry Snitselaar wrote: Suravee Suthikulpanit @ 2020-10-14 19:50 MST: Certain device drivers allocate IO queues on a per-cpu basis. On AMD EPYC platform, which can support up-to 256 cpu threads, this can exceed the current MAX_IRQ_PER_TABLE limit of 256, and result in the error message: AMD-Vi: Failed to allocate IRTE This has been observed with certain NVME devices. AMD IOMMU hardware can actually support upto 512 interrupt remapping table entries. Therefore, update the driver to match the hardware limit. Please note that this also increases the size of interrupt remapping table to 8KB per device when using the 128-bit IRTE format. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30a5d412255a..427484c45589 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT 128 struct irq_remap_table { With this change should DTE_IRQ_TABLE_LEN be changed to 9? IIUC the spec correctly leaving it at 8 is saying the table is 256 entries long. You are correct. Sorry I missed this part. I'll send the fix-up patch ASAP. Thank you, Suravee
Re: [PATCH v2] iommu/amd: Enforce 4k mapping for certain IOMMU data structures
Will, To answer your questions from v1 thread. On 11/18/20 5:57 AM, Will Deacon wrote: > On 11/5/20 9:58 PM, Suravee Suthikulpanit wrote: >> AMD IOMMU requires 4k-aligned pages for the event log, the PPR log, >> and the completion wait write-back regions. However, when allocating >> the pages, they could be part of large mapping (e.g. 2M) page. >> This causes #PF due to the SNP RMP hardware enforces the check based >> on the page level for these data structures. > > Please could you include an example backtrace here? Unfortunately, we don't actually have the backtrace available here. This information is based on the SEV-SNP specification. >> So, fix by calling set_memory_4k() on the allocated pages. > > I think I'm missing something here. set_memory_4k() will break the kernel > linear mapping up into page granular mappings, but the IOMMU isn't using > that mapping, right? That's correct. This does not affect the IOMMU, but it affects the PSP FW. > It's just using the physical address returned by iommu_virt_to_phys(), so why does it matter? > > Just be nice to capture some of this rationale in the log, especially as > I'm not familiar with this device. According to the AMD SEV-SNP white paper (https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf), the Reverse Map Table (RMP) contains one entry for every 4K page of DRAM that may be used by the VM. In this case, the pages allocated by the IOMMU driver are added as 4K entries in the RMP table by the SEV-SNP FW. During the page table walk, the RMP checks if the page is owned by the hypervisor. Without calling set_memory_4k() to break the mapping up into 4K pages, pages could end up being part of large mapping (e.g. 2M page), in which the page access would be denied and result in #PF. >> Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait write-back semaphore") > > I couldn't figure out how that commit could cause this problem. Please can > you explain that to me? Hope this helps clarify. If so, I'll update the commit log and send out V3. Thanks, Suravee
Re: [PATCH] iommu/amd: Enforce 4k mapping for certain IOMMU data structures
Will, I have already submitted v2 of this patch. Let me move the discussion there instead ... (https://lore.kernel.org/linux-iommu/20201105145832.3065-1-suravee.suthikulpa...@amd.com/) Suravee On 11/18/20 5:57 AM, Will Deacon wrote: On Wed, Oct 28, 2020 at 11:18:24PM +, Suravee Suthikulpanit wrote: AMD IOMMU requires 4k-aligned pages for the event log, the PPR log, and the completion wait write-back regions. However, when allocating the pages, they could be part of large mapping (e.g. 2M) page. This causes #PF due to the SNP RMP hardware enforces the check based on the page level for these data structures. Please could you include an example backtrace here? So, fix by calling set_memory_4k() on the allocated pages. I think I'm missing something here. set_memory_4k() will break the kernel linear mapping up into page granular mappings, but the IOMMU isn't using that mapping, right? It's just using the physical address returned by iommu_virt_to_phys(), so why does it matter? Just be nice to capture some of this rationale in the log, especially as I'm not familiar with this device. Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait write-back semaphore") I couldn't figure out how that commit could cause this problem. Please can you explain that to me? Cheers, Will
Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers
Tglx, On 11/18/20 9:06 PM, Thomas Gleixner wrote: Suravee, On Wed, Nov 18 2020 at 17:29, Suravee Suthikulpanit wrote: On 11/17/20 9:00 AM, Suravee Suthikulpanit wrote: I might need your help debugging this issue. I'm seeing the following error: [ 14.005937] irq 29, desc: d200500b, depth: 0, count: 0, unhandled: 0 [ 14.006234] ->handle_irq(): eab4b6eb, handle_bad_irq+0x0/0x230 [ 14.006234] ->irq_data.chip(): 1cce6d6b, intcapxt_controller+0x0/0x120 [ 14.006234] ->action(): 83bfd734 [ 14.006234] ->action->handler(): 94806345, amd_iommu_int_handler+0x0/0x10 [ 14.006234] unexpected IRQ trap at vector 1d Do you have any idea what might have gone wrong here? Yes. This lacks setting up the low level flow handler. Delta patch below. Thanks, tglx --- --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2033,6 +2033,7 @@ static int intcapxt_irqdomain_alloc(stru irqd->chip = &intcapxt_controller; irqd->chip_data = info->data; + __irq_set_handler(i, handle_edge_irq, 0, "edge"); } return ret; Yes, this fixes the issue. Now I can receive the IOMMU event log interrupts for IO_PAGE_FAULT event, which is triggered using the injection interface via debugfs. Thanks, Suravee
Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers
David On 11/17/20 9:00 AM, Suravee Suthikulpanit wrote: David, On 11/13/20 10:14 PM, David Woodhouse wrote: On Wed, 2020-11-11 at 14:30 -0600, Tom Lendacky wrote: I had trouble cloning your tree for some reason, so just took the top three patches and applied them to the tip tree. This all appears to be working. I'll let the IOMMU experts take a closer look (adding Suravee). Thanks. I see Thomas has taken the first two into the tip.git x86/apic branch already, so we're just looking for an ack on the third. Which is this one... From 49ee4fa51b8c06d14b7c4c74d15a7d76f865a8ea Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Nov 2020 12:09:01 + Subject: [PATCH] iommu/amd: Fix IOMMU interrupt generation in X2APIC mode The AMD IOMMU has two modes for generating its own interrupts. The first is very much based on PCI MSI, and can be configured by Linux precisely that way. But like legacy unmapped PCI MSI it's limited to 8 bits of APIC ID. The second method does not use PCI MSI at all in hardawre, and instead configures the INTCAPXT registers in the IOMMU directly with the APIC ID and vector. In the latter case, the IOMMU driver would still use pci_enable_msi(), read back (through MMIO) the MSI message that Linux wrote to the PCI MSI table, then swizzle those bits into the appropriate register. Historically, this worked because__irq_compose_msi_msg() would silently generate an invalid MSI message with the high bits of the APIC ID in the high bits of the MSI address. That hack was intended only for the Intel IOMMU, and I recently enforced that, introducing a warning in __irq_msi_compose_msg() if it was invoked with an APIC ID above 255. Fix the AMD IOMMU not to depend on that hack any more, by having its own irqdomain and directly putting the bits from the irq_cfg into the right place in its ->activate() method. Fixes: 47bea873cf80 "x86/msi: Only use high bits of MSI address for DMAR unit") Signed-off-by: David Woodhouse I'm still working on testing this series using IO_PAGE_FAULT injection to trigger the IOMMU interrupts. I am still debugging some issues, and I'll keep you updated on the findings. Thanks, Suravee I might need your help debugging this issue. I'm seeing the following error: [ 14.005937] irq 29, desc: d200500b, depth: 0, count: 0, unhandled: 0 [ 14.006234] ->handle_irq(): eab4b6eb, handle_bad_irq+0x0/0x230 [ 14.006234] ->irq_data.chip(): 1cce6d6b, intcapxt_controller+0x0/0x120 [ 14.006234] ->action(): 83bfd734 [ 14.006234] ->action->handler(): 94806345, amd_iommu_int_handler+0x0/0x10 [ 14.006234] unexpected IRQ trap at vector 1d Do you have any idea what might have gone wrong here? Thanks, Suravee
Re: [EXTERNAL] [tip: x86/apic] x86/io_apic: Cleanup trigger/polarity helpers
David, On 11/13/20 10:14 PM, David Woodhouse wrote: On Wed, 2020-11-11 at 14:30 -0600, Tom Lendacky wrote: I had trouble cloning your tree for some reason, so just took the top three patches and applied them to the tip tree. This all appears to be working. I'll let the IOMMU experts take a closer look (adding Suravee). Thanks. I see Thomas has taken the first two into the tip.git x86/apic branch already, so we're just looking for an ack on the third. Which is this one... From 49ee4fa51b8c06d14b7c4c74d15a7d76f865a8ea Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Nov 2020 12:09:01 + Subject: [PATCH] iommu/amd: Fix IOMMU interrupt generation in X2APIC mode The AMD IOMMU has two modes for generating its own interrupts. The first is very much based on PCI MSI, and can be configured by Linux precisely that way. But like legacy unmapped PCI MSI it's limited to 8 bits of APIC ID. The second method does not use PCI MSI at all in hardawre, and instead configures the INTCAPXT registers in the IOMMU directly with the APIC ID and vector. In the latter case, the IOMMU driver would still use pci_enable_msi(), read back (through MMIO) the MSI message that Linux wrote to the PCI MSI table, then swizzle those bits into the appropriate register. Historically, this worked because__irq_compose_msi_msg() would silently generate an invalid MSI message with the high bits of the APIC ID in the high bits of the MSI address. That hack was intended only for the Intel IOMMU, and I recently enforced that, introducing a warning in __irq_msi_compose_msg() if it was invoked with an APIC ID above 255. Fix the AMD IOMMU not to depend on that hack any more, by having its own irqdomain and directly putting the bits from the irq_cfg into the right place in its ->activate() method. Fixes: 47bea873cf80 "x86/msi: Only use high bits of MSI address for DMAR unit") Signed-off-by: David Woodhouse I'm still working on testing this series using IO_PAGE_FAULT injection to trigger the IOMMU interrupts. I am still debugging some issues, and I'll keep you updated on the findings. Thanks, Suravee
Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support
Joerg, Please ignore to include the V3. I am working on V4 to resubmit. Thank you, Suravee On 11/11/20 10:10 AM, Suravee Suthikulpanit wrote: Hi Joerg, Do you have any update on this series? Thanks, Suravee On 11/2/20 10:16 AM, Suravee Suthikulpanit wrote: Joerg, You mentioned to remind you to pull this in to linux-next. Thanks, Suravee On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data. - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (14): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Introduce IOMMU flush callbacks iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/io_pgtable.c | 564 drivers/iommu/amd/iommu.c | 646 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 691 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c
Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support
Hi Joerg, Do you have any update on this series? Thanks, Suravee On 11/2/20 10:16 AM, Suravee Suthikulpanit wrote: Joerg, You mentioned to remind you to pull this in to linux-next. Thanks, Suravee On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data. - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (14): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Introduce IOMMU flush callbacks iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/io_pgtable.c | 564 drivers/iommu/amd/iommu.c | 646 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 691 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c
[PATCH v2] iommu/amd: Enforce 4k mapping for certain IOMMU data structures
AMD IOMMU requires 4k-aligned pages for the event log, the PPR log, and the completion wait write-back regions. However, when allocating the pages, they could be part of large mapping (e.g. 2M) page. This causes #PF due to the SNP RMP hardware enforces the check based on the page level for these data structures. So, fix by calling set_memory_4k() on the allocated pages. Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait write-back semaphore") Cc: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/init.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 82e4af8f09bb..23a790f8f550 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -672,11 +673,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, +gfp_t gfp, size_t size) +{ + int order = get_order(size); + void *buf = (void *)__get_free_pages(gfp, order); + + if (buf && + iommu_feature(iommu, FEATURE_SNP) && + set_memory_4k((unsigned long)buf, (1 << order))) { + free_pages((unsigned long)buf, order); + buf = NULL; + } + + return buf; +} + /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; } @@ -715,8 +732,8 @@ static void __init free_event_buffer(struct amd_iommu *iommu) /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); + iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + PPR_LOG_SIZE); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -838,7 +855,7 @@ static int iommu_init_ga(struct amd_iommu *iommu) static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL); + iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); return iommu->cmd_sem ? 0 : -ENOMEM; } -- 2.17.1
Re: [PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support
Joerg, You mentioned to remind you to pull this in to linux-next. Thanks, Suravee On 10/4/20 8:45 AM, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data. - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (14): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Introduce IOMMU flush callbacks iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/io_pgtable.c | 564 drivers/iommu/amd/iommu.c | 646 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 691 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c
[PATCH] iommu/amd: Enforce 4k mapping for certain IOMMU data structures
AMD IOMMU requires 4k-aligned pages for the event log, the PPR log, and the completion wait write-back regions. However, when allocating the pages, they could be part of large mapping (e.g. 2M) page. This causes #PF due to the SNP RMP hardware enforces the check based on the page level for these data structures. So, fix by calling set_memory_4k() on the allocated pages. Fixes: commit c69d89aff393 ("iommu/amd: Use 4K page for completion wait write-back semaphore") Cc: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/init.c | 22 +- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 82e4af8f09bb..75dc30226a7c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -672,11 +673,22 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +static void *__init iommu_alloc_4k_pages(gfp_t gfp, size_t size) +{ + void *buf; + int order = get_order(size); + + buf = (void *)__get_free_pages(gfp, order); + if (!buf) + return buf; + return set_memory_4k((unsigned long)buf, (1 << order)) ? NULL : buf; +} + /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO, + EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; } @@ -715,8 +727,8 @@ static void __init free_event_buffer(struct amd_iommu *iommu) /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); + iommu->ppr_log = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO, + PPR_LOG_SIZE); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -838,7 +850,7 @@ static int iommu_init_ga(struct amd_iommu *iommu) static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL); + iommu->cmd_sem = iommu_alloc_4k_pages(GFP_KERNEL | __GFP_ZERO, 1); return iommu->cmd_sem ? 0 : -ENOMEM; } -- 2.17.1
Re: [PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries
Hi Joerg, Do you have any concerns regarding this patch? Thanks, Suravee On 10/15/20 9:50 AM, Suravee Suthikulpanit wrote: Certain device drivers allocate IO queues on a per-cpu basis. On AMD EPYC platform, which can support up-to 256 cpu threads, this can exceed the current MAX_IRQ_PER_TABLE limit of 256, and result in the error message: AMD-Vi: Failed to allocate IRTE This has been observed with certain NVME devices. AMD IOMMU hardware can actually support upto 512 interrupt remapping table entries. Therefore, update the driver to match the hardware limit. Please note that this also increases the size of interrupt remapping table to 8KB per device when using the 128-bit IRTE format. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30a5d412255a..427484c45589 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT 128 struct irq_remap_table {
[PATCH] iommu/amd: Increase interrupt remapping table limit to 512 entries
Certain device drivers allocate IO queues on a per-cpu basis. On AMD EPYC platform, which can support up-to 256 cpu threads, this can exceed the current MAX_IRQ_PER_TABLE limit of 256, and result in the error message: AMD-Vi: Failed to allocate IRTE This has been observed with certain NVME devices. AMD IOMMU hardware can actually support upto 512 interrupt remapping table entries. Therefore, update the driver to match the hardware limit. Please note that this also increases the size of interrupt remapping table to 8KB per device when using the 128-bit IRTE format. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30a5d412255a..427484c45589 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -406,7 +406,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT128 struct irq_remap_table { -- 2.17.1
Re: [PATCH] KVM: SVM: Initialize prev_ga_tag before use
Paolo, Are there any issues or concerns about this patch? Thank you, Suravee On 10/4/20 6:27 AM, Suravee Suthikulpanit wrote: The function amd_ir_set_vcpu_affinity makes use of the parameter struct amd_iommu_pi_data.prev_ga_tag to determine if it should delete struct amd_iommu_pi_data from a list when not running in AVIC mode. However, prev_ga_tag is initialized only when AVIC is enabled. The non-zero uninitialized value can cause unintended code path, which ends up making use of the struct vcpu_svm.ir_list and ir_list_lock without being initialized (since they are intended only for the AVIC case). This triggers NULL pointer dereference bug in the function vm_ir_list_del with the following call trace: svm_update_pi_irte+0x3c2/0x550 [kvm_amd] ? proc_create_single_data+0x41/0x50 kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm] __connect+0x5f/0xb0 [irqbypass] irq_bypass_register_producer+0xf8/0x120 [irqbypass] vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci] vfio_msi_set_block+0x77/0xe0 [vfio_pci] vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci] vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci] vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci] ? alloc_file_pseudo+0xa5/0x100 vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] ? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] __x64_sys_ioctl+0x96/0xd0 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Therefore, initialize prev_ga_tag to zero before use. This should be safe because ga_tag value 0 is invalid (see function avic_vm_init). Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into avic_init_vcpu()") Signed-off-by: Suravee Suthikulpanit --- arch/x86/kvm/svm/avic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index ac830cd50830..381d22daa4ac 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -868,6 +868,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, * - Tell IOMMU to use legacy mode for this interrupt. * - Retrieve ga_tag of prior interrupt remapping data. */ + pi.prev_ga_tag = 0; pi.is_guest_mode = false; ret = irq_set_vcpu_affinity(host_irq, &pi);
[PATCH v3 10/14] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
To simplify the fetch_pte function. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 13 +++-- drivers/iommu/amd/iommu.c | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2059e64fdc53..69996e57fae2 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom, extern unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long page_size); -extern u64 *fetch_pte(struct protection_domain *domain, +extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 989db64a89a7..93ff8cb452ed 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct protection_domain *domain, +u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size) { @@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain, *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->iop.mode)) + if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL; - level = domain->iop.mode - 1; - pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte= &pgtable->root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long iova, unsigned long size) { + struct io_pgtable_ops *ops = &dom->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; @@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped = 0; while (unmapped < size) { - pte = fetch_pte(dom, iova, &unmap_size); - + pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { int i, count; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3f6ede1e572c..87cea1cde414 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; if (domain->iop.mode == PAGE_MODE_NONE) return iova; - pte = fetch_pte(domain, iova, &pte_pgsize); + pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; -- 2.17.1
[PATCH v3 13/14] iommu/amd: Introduce IOMMU flush callbacks
Add TLB flush callback functions, which are used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index d8b329aa0bb2..3c2faa47ea5d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -514,6 +514,33 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo /* * */ +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_flush_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, +unsigned long iova, size_t granule, +void *cookie) +{ +} + +const struct iommu_flush_ops v1_flush_ops = { + .tlb_flush_all = v1_tlb_flush_all, + .tlb_flush_walk = v1_tlb_flush_walk, + .tlb_flush_leaf = v1_tlb_flush_leaf, + .tlb_add_page = v1_tlb_add_page, +}; + static void v1_free_pgtable(struct io_pgtable *iop) { } @@ -526,6 +553,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->iop.ops.unmap= iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + cfg->tlb = &v1_flush_ops; + return &pgtable->iop; } -- 2.17.1
[PATCH v3 11/14] iommu/amd: Introduce iommu_v1_iova_to_phys
This implements iova_to_phys for AMD IOMMU v1 pagetable, which will be used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 22 ++ drivers/iommu/amd/iommu.c | 16 +--- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 93ff8cb452ed..7841e5e1e563 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, return unmapped; } +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + /* * */ @@ -505,6 +525,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 87cea1cde414..9a1a16031e00 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - unsigned long offset_mask, pte_pgsize; - u64 *pte, __pte; - if (domain->iop.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(pgtable, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) -- 2.17.1
[PATCH v3 06/14] iommu/amd: Move IO page table related functions
Preparing to migrate to use IO page table framework. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 18 ++ drivers/iommu/amd/io_pgtable.c | 473 drivers/iommu/amd/iommu.c | 476 + 3 files changed, 493 insertions(+), 474 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8b7be9171030..ee7ff4d827e1 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -122,4 +122,22 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +/* TODO: These are temporary and will be removed once fully transition */ +extern void free_pagetable(struct domain_pgtable *pgtable); +extern int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + unsigned long page_size, + int prot, + gfp_t gfp); +extern unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size); +extern u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size); +extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, +struct domain_pgtable *pgtable); +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, +u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6b2de9e467d9..c11355afe624 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -23,6 +23,479 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, +unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/ + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + / + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);
[PATCH v3 14/14] iommu/amd: Adopt IO page table framework
Switch to using IO page table framework for AMD IOMMU v1 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/iommu.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 77f44b927ae7..6f8316206fb8 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1573,6 +1574,22 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) return ret; } +struct io_pgtable_ops * +amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[dev_data->devid]; + + domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = AMD_IOMMU_PGSIZES, + .ias= IOMMU_IN_ADDR_BIT_SIZE, + .oas= IOMMU_OUT_ADDR_BIT_SIZE, + .iommu_dev = &iommu->dev->dev, + }; + + return alloc_io_pgtable_ops(AMD_IOMMU_V1, &domain->iop.pgtbl_cfg, domain); +} + /* * If a device is not yet associated with a domain, this function makes the * device visible in the domain @@ -1580,6 +1597,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) static int attach_device(struct device *dev, struct protection_domain *domain) { + struct io_pgtable_ops *pgtbl_ops; struct iommu_dev_data *dev_data; struct pci_dev *pdev; unsigned long flags; @@ -1623,6 +1641,12 @@ static int attach_device(struct device *dev, skip_ats_check: ret = 0; + pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain); + if (!pgtbl_ops) { + ret = -ENOMEM; + goto out; + } + do_attach(dev_data, domain); /* @@ -1958,6 +1982,8 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) if (domain->dev_cnt > 0) cleanup_domain(domain); + free_io_pgtable_ops(&domain->iop.iop.ops); + BUG_ON(domain->dev_cnt != 0); if (!dom) -- 2.17.1
[PATCH v3 12/14] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
These implement map and unmap for AMD IOMMU v1 pagetable, which will be used by the IO pagetable framework. Also clean up unused extern function declarations. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 - drivers/iommu/amd/io_pgtable.c | 25 - drivers/iommu/amd/iommu.c | 7 --- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 69996e57fae2..2e8dc2a1ec0f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -/* TODO: These are temporary and will be removed once fully transition */ -extern int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp); -extern unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size); -extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 7841e5e1e563..d8b329aa0bb2 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size) +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) { int level; u64 *pte; @@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -int iommu_map_page(struct protection_domain *dom, - unsigned long iova, - unsigned long paddr, - unsigned long size, - int prot, - gfp_t gfp) +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct page *freelist = NULL; bool updated = false; u64 __pte, *pte; @@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom, return ret; } -unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long iova, - unsigned long size) +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) { - struct io_pgtable_ops *ops = &dom->iop.iop.ops; struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; @@ -525,6 +522,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.unmap= iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pgtable->iop; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9a1a16031e00..77f44b927ae7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; int ret; @@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - + ret = ops->map(o
[PATCH v3 09/14] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops
There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 31 +++ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6c063d2c8bf0..989db64a89a7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * and full 64 bit address spaces. */ int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, + unsigned long iova, + unsigned long paddr, + unsigned long size, int prot, gfp_t gfp) { @@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom, updated = true; if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom, } unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) + unsigned long iova, + unsigned long size) { unsigned long long unmapped; unsigned long unmap_size; u64 *pte; - BUG_ON(!is_power_of_2(page_size)); + BUG_ON(!is_power_of_2(size)); unmapped = 0; - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); + while (unmapped < size) { + pte = fetch_pte(dom, iova, &unmap_size); if (pte) { int i, count; @@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, pte[i] = 0ULL; } - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } -- 2.17.1
[PATCH v3 02/14] iommu/amd: Prepare for generic IO page table framework
Add initial hook up code to implement generic IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 35 +++ drivers/iommu/amd/io_pgtable.c | 43 + drivers/iommu/amd/iommu.c | 10 --- drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 ++ 7 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index f696ac7c5f89..e3ac3e57e507 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -461,6 +475,26 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ +struct protection_domain, iop) + +#define io_pgtable_cfg_to_data(x) \ + container_of((x), struct amd_io_pgtable, pgtbl_cfg) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -469,6 +503,7 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ atomic64_t pt_root; /* pgtable root and pgtable mode */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index ..6b2de9e467d9 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt)pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +/* + * + */ +static void v1_free_pgtable(struct io_pgtable *iop) +{ +} + +static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + + return &pgtable->iop; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { + .alloc = v1_alloc_pgtable, + .free = v1_free_pgtable, +}; diff
[PATCH v3 08/14] iommu/amd: Remove amd_iommu_domain_get_pgtable
Since the IO page table root and mode parameters have been moved into the struct amd_io_pg, the function is no longer needed. Therefore, remove it along with the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/amd_iommu_types.h | 6 - drivers/iommu/amd/io_pgtable.c | 36 ++--- drivers/iommu/amd/iommu.c | 34 --- 4 files changed, 19 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8dff7d85be79..2059e64fdc53 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -101,6 +101,8 @@ static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } static inline @@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct protection_domain *dom, extern u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size); -extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, -struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 80b5c34357ed..de3fe9433080 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -514,12 +514,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 23e82da2dea8..6c063d2c8bf0 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -184,30 +184,27 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { - struct domain_pgtable pgtable; unsigned long flags; bool ret = true; u64 *pte; spin_lock_irqsave(&domain->lock, flags); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) goto out; ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); - pgtable.root = pte; - pgtable.mode += 1; + domain->iop.root = pte; + domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); amd_iommu_domain_flush_complete(domain); @@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain *domain, * Device Table needs to be updated and flushed before the new root can * be published. */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); ret = true; @@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { - struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { /* * Return an error if there is no memory to update the * page-table. */ if (!increase_address_space(domain, address, gfp)) return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); } - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain-
[PATCH v3 05/14] iommu/amd: Declare functions as extern
And move declaration to header file so that they can be included across multiple files. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 39 +-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 22ecacb71675..8b7be9171030 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 09da37c4c9c4..f91f35edb7ba 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -88,7 +88,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); / @@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1481,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; amd_iommu_update_and_flush_device_table(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can @@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom, * Updates and flushing already happened in * increase_address_space(). */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { struct domain_pgtable pgtable; @@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); -
[PATCH v3 04/14] iommu/amd: Convert to using amd_io_pgtable
Make use of the new struct amd_io_pgtable in preparation to remove the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 25 ++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index da6e09657e00..22ecacb71675 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c8b8619cc744..09da37c4c9c4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); / * @@ -1482,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); domain_flush_complete(domain); /* @@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * */ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } @@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain *domain) /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); -- 2.17.1
[PATCH v3 01/14] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
Move the function to header file to allow inclusion in other files. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 + drivers/iommu/amd/iommu.c | 10 -- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 57309716fd18..97cdb235ce69 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -93,6 +93,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->pt_root, root); +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index db4fb840c59c..e92b3f744292 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -162,16 +162,6 @@ static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ } -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { -- 2.17.1
[PATCH v3 07/14] iommu/amd: Restructure code for freeing page table
Introduce amd_iommu_free_pgtable helper function, which consolidates logic for freeing page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 12 +++- drivers/iommu/amd/iommu.c | 19 ++- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index ee7ff4d827e1..8dff7d85be79 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif /* TODO: These are temporary and will be removed once fully transition */ -extern void free_pagetable(struct domain_pgtable *pgtable); extern int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, @@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index c11355afe624..23e82da2dea8 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -136,14 +136,24 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -void free_pagetable(struct domain_pgtable *pgtable) +void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable) { + struct protection_domain *dom; struct page *freelist = NULL; unsigned long root; if (pgtable->mode == PAGE_MODE_NONE) return; + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4d65f64236b6..cbbea7b952fb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); kfree(domain); } @@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); spin_unlock_irqrestore(&domain->lock, flags); } -- 2.17.1
[PATCH v3 00/14] iommu/amd: Add Generic IO Page Table Framework Support
The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V2 (https://lore.kernel.org/lkml/835c0d46-ed96-9fbe-856a-777dcffac...@amd.com/T/#t) - Patch 2/14: Introduce helper function io_pgtable_cfg_to_data. - Patch 13/14: Put back the struct iommu_flush_ops since patch v2 would run into NULL pointer bug when calling free_io_pgtable_ops if not defined. Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (14): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Introduce IOMMU flush callbacks iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 43 +- drivers/iommu/amd/io_pgtable.c | 564 drivers/iommu/amd/iommu.c | 646 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 691 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c -- 2.17.1
[PATCH v3 03/14] iommu/amd: Move pt_root to to struct amd_io_pgtable
To better organize the data structure since it contains IO page table related information. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 97cdb235ce69..da6e09657e00 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->pt_root, root); + atomic64_set(&domain->iop.pt_root, root); } static inline diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index e3ac3e57e507..80b5c34357ed 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -493,6 +493,7 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ }; /* @@ -506,7 +507,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx;/* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2b7eb51dcbb8..c8b8619cc744 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable) { - u64 pt_root = atomic64_read(&domain->pt_root); + u64 pt_root = atomic64_read(&domain->iop.pt_root); pgtable->root = (u64 *)(pt_root & PAGE_MASK); pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -- 2.17.1
Re: [PATCH] KVM: SVM: Initialize ir_list and ir_list_lock regardless of AVIC enablement
Paolo, On 9/28/20 3:01 PM, Paolo Bonzini wrote: On 28/09/20 07:53, Suravee Suthikulpanit wrote: Hi, Are there any issues or concerns about this patch? Yes, sorry I haven't replied yet. Looks like Linus is doing an -rc8 so there's plenty of time to have it in 5.9. The thing I'm wondering is, why is svm_update_pi_irte doing anything if you don't have AVIC enabled? In other word, this might not be the root cause of the bug. You always get to the "else" branch of the loop of course, and I'm not sure how irq_set_vcpu_affinity returns something with pi.prev_ga_tag set. You are right. pi_prev_ga_tag needs to be initialized before used (in case AVIC is not enabled). I have already sent out another patch to properly fix the issue instead with subject (KVM: SVM: Initialize prev_ga_tag before use). Thanks, Suravee
[PATCH] KVM: SVM: Initialize prev_ga_tag before use
The function amd_ir_set_vcpu_affinity makes use of the parameter struct amd_iommu_pi_data.prev_ga_tag to determine if it should delete struct amd_iommu_pi_data from a list when not running in AVIC mode. However, prev_ga_tag is initialized only when AVIC is enabled. The non-zero uninitialized value can cause unintended code path, which ends up making use of the struct vcpu_svm.ir_list and ir_list_lock without being initialized (since they are intended only for the AVIC case). This triggers NULL pointer dereference bug in the function vm_ir_list_del with the following call trace: svm_update_pi_irte+0x3c2/0x550 [kvm_amd] ? proc_create_single_data+0x41/0x50 kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm] __connect+0x5f/0xb0 [irqbypass] irq_bypass_register_producer+0xf8/0x120 [irqbypass] vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci] vfio_msi_set_block+0x77/0xe0 [vfio_pci] vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci] vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci] vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci] ? alloc_file_pseudo+0xa5/0x100 vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] ? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] __x64_sys_ioctl+0x96/0xd0 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Therefore, initialize prev_ga_tag to zero before use. This should be safe because ga_tag value 0 is invalid (see function avic_vm_init). Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into avic_init_vcpu()") Signed-off-by: Suravee Suthikulpanit --- arch/x86/kvm/svm/avic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index ac830cd50830..381d22daa4ac 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -868,6 +868,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, * - Tell IOMMU to use legacy mode for this interrupt. * - Retrieve ga_tag of prior interrupt remapping data. */ + pi.prev_ga_tag = 0; pi.is_guest_mode = false; ret = irq_set_vcpu_affinity(host_irq, &pi); -- 2.17.1
Re: [PATCH v2 00/13] iommu/amd: Add Generic IO Page Table Framework Support
I found an issue w/ this series. Please ignore. I'll send out V3. Regards, Suravee On 10/2/20 7:28 PM, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (13): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 40 +- drivers/iommu/amd/io_pgtable.c | 534 +++ drivers/iommu/amd/iommu.c | 644 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 656 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c
[PATCH v2 08/13] iommu/amd: Remove amd_iommu_domain_get_pgtable
Since the IO page table root and mode parameters have been moved into the struct amd_io_pg, the function is no longer needed. Therefore, remove it along with the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/amd_iommu_types.h | 6 - drivers/iommu/amd/io_pgtable.c | 36 ++--- drivers/iommu/amd/iommu.c | 34 --- 4 files changed, 19 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8dff7d85be79..2059e64fdc53 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -101,6 +101,8 @@ static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } static inline @@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct protection_domain *dom, extern u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size); -extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, -struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5d53b7bec256..a07af389eae1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -511,12 +511,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 0c886419166b..a2acd7e85ec3 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -184,30 +184,27 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { - struct domain_pgtable pgtable; unsigned long flags; bool ret = true; u64 *pte; spin_lock_irqsave(&domain->lock, flags); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) goto out; ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); - pgtable.root = pte; - pgtable.mode += 1; + domain->iop.root = pte; + domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); amd_iommu_domain_flush_complete(domain); @@ -215,7 +212,7 @@ static bool increase_address_space(struct protection_domain *domain, * Device Table needs to be updated and flushed before the new root can * be published. */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); ret = true; @@ -232,29 +229,23 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { - struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { /* * Return an error if there is no memory to update the * page-table. */ if (!increase_address_space(domain, address, gfp)) return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); } - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain-
[PATCH v2 06/13] iommu/amd: Move IO page table related functions
Preparing to migrate to use IO page table framework. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 18 ++ drivers/iommu/amd/io_pgtable.c | 473 drivers/iommu/amd/iommu.c | 476 + 3 files changed, 493 insertions(+), 474 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8b7be9171030..ee7ff4d827e1 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -122,4 +122,22 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +/* TODO: These are temporary and will be removed once fully transition */ +extern void free_pagetable(struct domain_pgtable *pgtable); +extern int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + unsigned long page_size, + int prot, + gfp_t gfp); +extern unsigned long iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long page_size); +extern u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, + unsigned long *page_size); +extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, +struct domain_pgtable *pgtable); +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, +u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index f123ab6e8a51..7fd3dd9db197 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -23,6 +23,479 @@ #include "amd_iommu_types.h" #include "amd_iommu.h" +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, +unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/ + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + / + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);
[PATCH v2 07/13] iommu/amd: Restructure code for freeing page table
Introduce amd_iommu_free_pgtable helper function, which consolidates logic for freeing page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 12 +++- drivers/iommu/amd/iommu.c | 19 ++- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index ee7ff4d827e1..8dff7d85be79 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif /* TODO: These are temporary and will be removed once fully transition */ -extern void free_pagetable(struct domain_pgtable *pgtable); extern int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, @@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 7fd3dd9db197..0c886419166b 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -136,14 +136,24 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -void free_pagetable(struct domain_pgtable *pgtable) +void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable) { + struct protection_domain *dom; struct page *freelist = NULL; unsigned long root; if (pgtable->mode == PAGE_MODE_NONE) return; + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4d65f64236b6..cbbea7b952fb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); kfree(domain); } @@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); spin_unlock_irqrestore(&domain->lock, flags); } -- 2.17.1
[PATCH v2 04/13] iommu/amd: Convert to using amd_io_pgtable
Make use of the new struct amd_io_pgtable in preparation to remove the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 25 ++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index da6e09657e00..22ecacb71675 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c8b8619cc744..09da37c4c9c4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); / * @@ -1482,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); domain_flush_complete(domain); /* @@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * */ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } @@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain *domain) /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); -- 2.17.1
[PATCH v2 02/13] iommu/amd: Prepare for generic IO page table framework
Add initial hook up code to implement generic IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 32 + drivers/iommu/amd/io_pgtable.c | 43 + drivers/iommu/amd/iommu.c | 10 --- drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 ++ 7 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index f696ac7c5f89..77cd8d966fbc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -461,6 +475,23 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ +struct protection_domain, iop) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -469,6 +500,7 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ atomic64_t pt_root; /* pgtable root and pgtable mode */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index ..f123ab6e8a51 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt)pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +/* + * + */ +static void v1_free_pgtable(struct io_pgtable *iop) +{ +} + +static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct protection_domain *pdom = (struct protection_domain *)cookie; + + return &pdom->iop.iop; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { + .alloc = v1_alloc_pgtable, + .free = v1_free_pgtable, +}; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e92b3f744292..2b7e
[PATCH v2 09/13] iommu/amd: Rename variables to be consistent with struct io_pgtable_ops
There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 31 +++ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index a2acd7e85ec3..ff1294e8729d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -393,9 +393,9 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * and full 64 bit address spaces. */ int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, + unsigned long iova, + unsigned long paddr, + unsigned long size, int prot, gfp_t gfp) { @@ -404,15 +404,15 @@ int iommu_map_page(struct protection_domain *dom, u64 __pte, *pte; int ret, i, count; - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) goto out; - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); ret = -ENOMEM; if (!pte) @@ -425,10 +425,10 @@ int iommu_map_page(struct protection_domain *dom, updated = true; if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -462,20 +462,19 @@ int iommu_map_page(struct protection_domain *dom, } unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) + unsigned long iova, + unsigned long size) { unsigned long long unmapped; unsigned long unmap_size; u64 *pte; - BUG_ON(!is_power_of_2(page_size)); + BUG_ON(!is_power_of_2(size)); unmapped = 0; - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); + while (unmapped < size) { + pte = fetch_pte(dom, iova, &unmap_size); if (pte) { int i, count; @@ -485,7 +484,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, pte[i] = 0ULL; } - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; + iova = (iova & ~(unmap_size - 1)) + unmap_size; unmapped += unmap_size; } -- 2.17.1
[PATCH v2 11/13] iommu/amd: Introduce iommu_v1_iova_to_phys
This implements iova_to_phys for AMD IOMMU v1 pagetable, which will be used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 21 + drivers/iommu/amd/iommu.c | 16 +--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1729c303bae5..bbbf18d2514a 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -494,6 +494,26 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, return unmapped; } +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + /* * */ @@ -505,6 +525,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct protection_domain *pdom = (struct protection_domain *)cookie; + pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pdom->iop.iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 87cea1cde414..9a1a16031e00 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - unsigned long offset_mask, pte_pgsize; - u64 *pte, __pte; - if (domain->iop.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(pgtable, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) -- 2.17.1
[PATCH v2 13/13] iommu/amd: Adopt IO page table framework
Switch to using IO page table framework for AMD IOMMU v1 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/iommu.c | 24 1 file changed, 24 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 77f44b927ae7..c28949be3442 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1573,6 +1574,20 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) return ret; } +struct io_pgtable_ops * +amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ + domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = AMD_IOMMU_PGSIZES, + .ias= IOMMU_IN_ADDR_BIT_SIZE, + .oas= IOMMU_OUT_ADDR_BIT_SIZE, + .iommu_dev = &dev_data->pdev->dev, + }; + + return alloc_io_pgtable_ops(AMD_IOMMU_V1, &domain->iop.pgtbl_cfg, domain); +} + /* * If a device is not yet associated with a domain, this function makes the * device visible in the domain @@ -1580,6 +1595,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) static int attach_device(struct device *dev, struct protection_domain *domain) { + struct io_pgtable_ops *pgtbl_ops; struct iommu_dev_data *dev_data; struct pci_dev *pdev; unsigned long flags; @@ -1623,6 +1639,12 @@ static int attach_device(struct device *dev, skip_ats_check: ret = 0; + pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain); + if (!pgtbl_ops) { + ret = -ENOMEM; + goto out; + } + do_attach(dev_data, domain); /* @@ -1958,6 +1980,8 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) if (domain->dev_cnt > 0) cleanup_domain(domain); + free_io_pgtable_ops(&domain->iop.iop.ops); + BUG_ON(domain->dev_cnt != 0); if (!dom) -- 2.17.1
[PATCH v2 01/13] iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline
Move the function to header file to allow inclusion in other files. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 + drivers/iommu/amd/iommu.c | 10 -- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 57309716fd18..97cdb235ce69 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -93,6 +93,19 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->pt_root, root); +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index db4fb840c59c..e92b3f744292 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -162,16 +162,6 @@ static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ } -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode) { -- 2.17.1
[PATCH v2 10/13] iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable
To simplify the fetch_pte function. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 13 +++-- drivers/iommu/amd/iommu.c | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2059e64fdc53..69996e57fae2 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom, extern unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long page_size); -extern u64 *fetch_pte(struct protection_domain *domain, +extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index ff1294e8729d..1729c303bae5 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,7 +317,7 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct protection_domain *domain, +u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size) { @@ -326,11 +326,11 @@ u64 *fetch_pte(struct protection_domain *domain, *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->iop.mode)) + if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL; - level = domain->iop.mode - 1; - pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte= &pgtable->root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -465,6 +465,8 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long iova, unsigned long size) { + struct io_pgtable_ops *ops = &dom->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; @@ -474,8 +476,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped = 0; while (unmapped < size) { - pte = fetch_pte(dom, iova, &unmap_size); - + pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { int i, count; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3f6ede1e572c..87cea1cde414 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; if (domain->iop.mode == PAGE_MODE_NONE) return iova; - pte = fetch_pte(domain, iova, &pte_pgsize); + pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; -- 2.17.1
[PATCH v2 12/13] iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
These implement map and unmap for AMD IOMMU v1 pagetable, which will be used by the IO pagetable framework. Also clean up unused extern function declarations. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 - drivers/iommu/amd/io_pgtable.c | 25 - drivers/iommu/amd/iommu.c | 7 --- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 69996e57fae2..2e8dc2a1ec0f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -/* TODO: These are temporary and will be removed once fully transition */ -extern int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp); -extern unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size); -extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index bbbf18d2514a..a5f8d80a9d35 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -317,9 +317,9 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size) +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) { int level; u64 *pte; @@ -392,13 +392,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -int iommu_map_page(struct protection_domain *dom, - unsigned long iova, - unsigned long paddr, - unsigned long size, - int prot, - gfp_t gfp) +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct page *freelist = NULL; bool updated = false; u64 __pte, *pte; @@ -461,11 +458,11 @@ int iommu_map_page(struct protection_domain *dom, return ret; } -unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long iova, - unsigned long size) +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) { - struct io_pgtable_ops *ops = &dom->iop.iop.ops; struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; @@ -525,6 +522,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct protection_domain *pdom = (struct protection_domain *)cookie; + pdom->iop.iop.ops.map = iommu_v1_map_page; + pdom->iop.iop.ops.unmap= iommu_v1_unmap_page; pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pdom->iop.iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9a1a16031e00..77f44b927ae7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; int ret; @@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - + ret = op
[PATCH v2 05/13] iommu/amd: Declare functions as extern
And move declaration to header file so that they can be included across multiple files. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 39 +-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 22ecacb71675..8b7be9171030 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 09da37c4c9c4..f91f35edb7ba 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -88,7 +88,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); / @@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1481,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; amd_iommu_update_and_flush_device_table(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can @@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom, * Updates and flushing already happened in * increase_address_space(). */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { struct domain_pgtable pgtable; @@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); -
[PATCH v2 03/13] iommu/amd: Move pt_root to to struct amd_io_pgtable
To better organize the data structure since it contains IO page table related information. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 97cdb235ce69..da6e09657e00 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->pt_root, root); + atomic64_set(&domain->iop.pt_root, root); } static inline diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 77cd8d966fbc..5d53b7bec256 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -490,6 +490,7 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ }; /* @@ -503,7 +504,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx;/* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2b7eb51dcbb8..c8b8619cc744 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable) { - u64 pt_root = atomic64_read(&domain->pt_root); + u64 pt_root = atomic64_read(&domain->iop.pt_root); pgtable->root = (u64 *)(pt_root & PAGE_MASK); pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -- 2.17.1
[PATCH v2 00/13] iommu/amd: Add Generic IO Page Table Framework Support
The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). This series refactors the current implementation of AMD IOMMU v1 page table to adopt the framework. There should be no functional change. Subsequent series will introduce support for the AMD IOMMU v2 page table. Thanks, Suravee Change from V1 (https://lkml.org/lkml/2020/9/23/251) - Do not specify struct io_pgtable_cfg.coherent_walk, since it is not currently used. (per Robin) - Remove unused struct iommu_flush_ops. (patch 2/13) - Move amd_iommu_setup_io_pgtable_ops to iommu.c instead of io_pgtable.c patch 13/13) Suravee Suthikulpanit (13): iommu/amd: Re-define amd_iommu_domain_encode_pgtable as inline iommu/amd: Prepare for generic IO page table framework iommu/amd: Move pt_root to to struct amd_io_pgtable iommu/amd: Convert to using amd_io_pgtable iommu/amd: Declare functions as extern iommu/amd: Move IO page table related functions iommu/amd: Restructure code for freeing page table iommu/amd: Remove amd_iommu_domain_get_pgtable iommu/amd: Rename variables to be consistent with struct io_pgtable_ops iommu/amd: Refactor fetch_pte to use struct amd_io_pgtable iommu/amd: Introduce iommu_v1_iova_to_phys iommu/amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page iommu/amd: Adopt IO page table framework drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 22 + drivers/iommu/amd/amd_iommu_types.h | 40 +- drivers/iommu/amd/io_pgtable.c | 534 +++ drivers/iommu/amd/iommu.c | 644 +++- drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 8 files changed, 656 insertions(+), 592 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c -- 2.17.1
Re: [PATCH 00/13] iommu: amd: Add Generic IO Page Table Framework Support
Joerg, On 10/1/20 7:59 PM, Joerg Roedel wrote: On Thu, Sep 24, 2020 at 05:50:37PM +0700, Suravee Suthikulpanit wrote: On 9/24/20 5:34 PM, Joerg Roedel wrote: Hi Suravee, On Wed, Sep 23, 2020 at 10:14:29AM +, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). Is there a reason you created your own framework, there is already an io-pgtable framework for ARM, maybe that can be reused? Actually, this is the same framework used by ARM codes. Sorry if the description is not clear. Ah, right, thanks. I think this should spend some time in linux-next before going upstream. Can you please remind me after the next merge window to pick it up? Thanks, Joerg Sure. Let me send out v2 for this with some more clean up. Thanks, Suravee
Re: [PATCH] KVM: SVM: Initialize ir_list and ir_list_lock regardless of AVIC enablement
Hi, Are there any issues or concerns about this patch? Thank you, Suravee On 9/22/20 3:44 PM, Suravee Suthikulpanit wrote: The struct vcpu_svm.ir_list and ir_list_lock are being accessed even when AVIC is not enabled, while current code only initialize the list and the lock only when AVIC is enabled. This ended up trigger NULL pointer dereference bug in the function vm_ir_list_del with the following call trace: svm_update_pi_irte+0x3c2/0x550 [kvm_amd] ? proc_create_single_data+0x41/0x50 kvm_arch_irq_bypass_add_producer+0x40/0x60 [kvm] __connect+0x5f/0xb0 [irqbypass] irq_bypass_register_producer+0xf8/0x120 [irqbypass] vfio_msi_set_vector_signal+0x1de/0x2d0 [vfio_pci] vfio_msi_set_block+0x77/0xe0 [vfio_pci] vfio_pci_set_msi_trigger+0x25c/0x2f0 [vfio_pci] vfio_pci_set_irqs_ioctl+0x88/0xb0 [vfio_pci] vfio_pci_ioctl+0x2ea/0xed0 [vfio_pci] ? alloc_file_pseudo+0xa5/0x100 vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] ? vfio_device_fops_unl_ioctl+0x26/0x30 [vfio] __x64_sys_ioctl+0x96/0xd0 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Therefore, move the initialziation code before checking for AVIC enabled so that it is always excuted. Fixes: dfa20099e26e ("KVM: SVM: Refactor AVIC vcpu initialization into avic_init_vcpu()") Signed-off-by: Suravee Suthikulpanit --- arch/x86/kvm/svm/avic.c | 2 -- arch/x86/kvm/svm/svm.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index ac830cd50830..1ccf13783785 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -572,8 +572,6 @@ int avic_init_vcpu(struct vcpu_svm *svm) if (ret) return ret; - INIT_LIST_HEAD(&svm->ir_list); - spin_lock_init(&svm->ir_list_lock); svm->dfr_reg = APIC_DFR_FLAT; return ret; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c44f3e9140d5..714d791fe5a5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1225,6 +1225,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x0165; + INIT_LIST_HEAD(&svm->ir_list); + spin_lock_init(&svm->ir_list_lock); + return 0; free_page4:
[PATCH] iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE
When using 128-bit interrupt-remapping table entry (IRTE) (a.k.a GA mode), current driver disables interrupt remapping when it updates the IRTE so that the upper and lower 64-bit values can be updated safely. However, this creates a small window, where the interrupt could arrive and result in IO_PAGE_FAULT (for interrupt) as shown below. IOMMU DriverDevice IRQ === irte.RemapEn=0 ... change IRTEIRQ from device ==> IO_PAGE_FAULT !! ... irte.RemapEn=1 This scenario has been observed when changing irq affinity on a system running I/O-intensive workload, in which the destination APIC ID in the IRTE is updated. Instead, use cmpxchg_double() to update the 128-bit IRTE at once without disabling the interrupt remapping. However, this means several features, which require GA (128-bit IRTE) support will also be affected if cmpxchg16b is not supported (which is unprecedented for AMD processors w/ IOMMU). Cc: sta...@vger.kernel.org Fixes: 880ac60e2538 ("iommu/amd: Introduce interrupt remapping ops structure") Reported-by: Sean Osborne Signed-off-by: Suravee Suthikulpanit Tested-by: Erik Rockstrom Reviewed-by: Joao Martins Link: https://lore.kernel.org/r/20200903093822.52012-3-suravee.suthikulpa...@amd.com Signed-off-by: Joerg Roedel --- Note: This patch is the back-port on top of the stable branch linux-5.4.y for the upstream commit e52d58d54a32 ("iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE") since the original patch does not apply cleanly. drivers/iommu/Kconfig | 2 +- drivers/iommu/amd_iommu.c | 17 + drivers/iommu/amd_iommu_init.c | 21 +++-- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 390568afee9f..fc0160e8ed33 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -138,7 +138,7 @@ config AMD_IOMMU select PCI_PASID select IOMMU_API select IOMMU_IOVA - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fa91d856a43e..7b724f7b27a9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3873,6 +3873,7 @@ static int alloc_irq_index(u16 devid, int count, bool align, static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, struct amd_ir_data *data) { + bool ret; struct irq_remap_table *table; struct amd_iommu *iommu; unsigned long flags; @@ -3890,10 +3891,18 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, entry = (struct irte_ga *)table->table; entry = &entry[index]; - entry->lo.fields_remap.valid = 0; - entry->hi.val = irte->hi.val; - entry->lo.val = irte->lo.val; - entry->lo.fields_remap.valid = 1; + + ret = cmpxchg_double(&entry->lo.val, &entry->hi.val, +entry->lo.val, entry->hi.val, +irte->lo.val, irte->hi.val); + /* +* We use cmpxchg16 to atomically update the 128-bit IRTE, +* and it cannot be updated by the hardware or other processors +* behind us, so the return value of cmpxchg16 should be the +* same as the old value. +*/ + WARN_ON(!ret); + if (data) data->ref = entry; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 135ae5222cf3..31d7e2d4f304 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1522,7 +1522,14 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + + /* +* Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. +* GAM also requires GA mode. Therefore, we need to +* check cmpxchg16b support before enabling it. +*/ + if (!boot_cpu_has(X86_FEATURE_CX16) || + ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1531,8 +1538,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end =
Re: [PATCH 02/13] iommu: amd: Prepare for generic IO page table framework
Robin, On 9/24/20 7:25 PM, Robin Murphy wrote: +struct io_pgtable_ops *amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data, + struct protection_domain *domain) +{ +domain->iop.pgtbl_cfg = (struct io_pgtable_cfg) { +.pgsize_bitmap= AMD_IOMMU_PGSIZES, +.ias= IOMMU_IN_ADDR_BIT_SIZE, +.oas= IOMMU_OUT_ADDR_BIT_SIZE, +.coherent_walk= false, Is that right? Given that you seem to use regular kernel addresses for pagetable pages and don't have any obvious cache maintenance around PTE manipulation, I suspect not ;) > It's fair enough if your implementation doesn't use this and simply assumes coherency, but in that case it would be less confusing to have the driver set it to true for the sake of honesty, or just leave it out entirely - explicitly setting false gives the illusion of being meaningful. AMD IOMMU can be configured to disable snoop for page table walk of a particular device (DTE[SD]=1). However, the current Linux driver does not set this bit, which should assume coherency. We can just leaving this out for now. I can remove this when I send out V2 along w/ other changes. Otherwise, the io-pgtable parts all look OK to me - it's nice to finally fulfil the original intent of not being an Arm-specific thing :D Robin. Thanks, Suravee
Re: [PATCH 00/13] iommu: amd: Add Generic IO Page Table Framework Support
On 9/24/20 5:34 PM, Joerg Roedel wrote: Hi Suravee, On Wed, Sep 23, 2020 at 10:14:29AM +, Suravee Suthikulpanit wrote: The framework allows callable implementation of IO page table. This allows AMD IOMMU driver to switch between different types of AMD IOMMU page tables (e.g. v1 vs. v2). Is there a reason you created your own framework, there is already an io-pgtable framework for ARM, maybe that can be reused? Actually, this is the same framework used by ARM codes. Sorry if the description is not clear. Suravee
[PATCH v2 1/3] iommu: amd: Use 4K page for completion wait write-back semaphore
IOMMU SNP support requires the completion wait write-back semaphore to be implemented using a 4K-aligned page, where the page address is to be programmed into the newly introduced MMIO base/range registers. This new scheme uses a per-iommu atomic variable to store the current semaphore value, which is incremented for every completion wait command. Since this new scheme is also compatible with non-SNP mode, generalize the driver to use 4K page for completion-wait semaphore in both modes. Cc: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 3 ++- drivers/iommu/amd/init.c| 18 ++ drivers/iommu/amd/iommu.c | 23 +++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 30a5d412255a..4c80483e78ec 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -595,7 +595,8 @@ struct amd_iommu { #endif u32 flags; - volatile u64 __aligned(8) cmd_sem; + volatile u64 *cmd_sem; + u64 cmd_sem_val; #ifdef CONFIG_AMD_IOMMU_DEBUGFS /* DebugFS Info */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 445a08d23fed..febc072f2717 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -813,6 +813,19 @@ static int iommu_init_ga(struct amd_iommu *iommu) return ret; } +static int __init alloc_cwwb_sem(struct amd_iommu *iommu) +{ + iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL); + + return iommu->cmd_sem ? 0 : -ENOMEM; +} + +static void __init free_cwwb_sem(struct amd_iommu *iommu) +{ + if (iommu->cmd_sem) + free_page((unsigned long)iommu->cmd_sem); +} + static void iommu_enable_xt(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP @@ -1395,6 +1408,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, static void __init free_iommu_one(struct amd_iommu *iommu) { + free_cwwb_sem(iommu); free_command_buffer(iommu); free_event_buffer(iommu); free_ppr_log(iommu); @@ -1481,6 +1495,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) int ret; raw_spin_lock_init(&iommu->lock); + iommu->cmd_sem_val = 0; /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); @@ -1558,6 +1573,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; + if (alloc_cwwb_sem(iommu)) + return -ENOMEM; + if (alloc_command_buffer(iommu)) return -ENOMEM; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 10e4200d3552..9e9898683537 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -792,11 +792,11 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) * / -static int wait_on_sem(volatile u64 *sem) +static int wait_on_sem(struct amd_iommu *iommu, u64 data) { int i = 0; - while (*sem == 0 && i < LOOP_TIMEOUT) { + while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) { udelay(1); i += 1; } @@ -827,16 +827,16 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } -static void build_completion_wait(struct iommu_cmd *cmd, u64 address) +static void build_completion_wait(struct iommu_cmd *cmd, + struct amd_iommu *iommu, + u64 data) { - u64 paddr = iommu_virt_to_phys((void *)address); - - WARN_ON(address & 0x7ULL); + u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); memset(cmd, 0, sizeof(*cmd)); cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; cmd->data[1] = upper_32_bits(paddr); - cmd->data[2] = 1; + cmd->data[2] = data; CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); } @@ -1045,22 +1045,21 @@ static int iommu_completion_wait(struct amd_iommu *iommu) struct iommu_cmd cmd; unsigned long flags; int ret; + u64 data; if (!iommu->need_sync) return 0; - - build_completion_wait(&cmd, (u64)&iommu->cmd_sem); - raw_spin_lock_irqsave(&iommu->lock, flags); - iommu->cmd_sem = 0; + data = ++iommu->cmd_sem_val; + build_completion_wait(&cmd, iommu, data); ret = __iommu_queue_command_sync(iommu, &cmd, false); if (ret) goto out_unlock; - ret = wait_on_sem(&iommu->cmd_sem); + ret = wait_on_sem(iommu, data); out_unlock: raw_spin_unlock_irqrestore(&iommu->lock, flags); -- 2.17.1
[PATCH v2 3/3] iommu: amd: Re-purpose Exclusion range registers to support SNP CWWB
When the IOMMU SNP support bit is set in the IOMMU Extended Features register, hardware re-purposes the following registers: 1. IOMMU Exclusion Base register (MMIO offset 0020h) to Completion Wait Write-Back (CWWB) Base register 2. IOMMU Exclusion Range Limit (MMIO offset 0028h) to Completion Wait Write-Back (CWWB) Range Limit register and requires the IOMMU CWWB semaphore base and range to be programmed in the register offset 0020h and 0028h accordingly. Cc: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/init.c| 26 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1e7966c73707..f696ac7c5f89 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -93,6 +93,7 @@ #define FEATURE_PC (1ULL<<9) #define FEATURE_GAM_VAPIC (1ULL<<21) #define FEATURE_EPHSUP (1ULL<<50) +#define FEATURE_SNP(1ULL<<63) #define FEATURE_PASID_SHIFT32 #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index febc072f2717..c55df4347487 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -359,6 +359,29 @@ static void iommu_set_exclusion_range(struct amd_iommu *iommu) &entry, sizeof(entry)); } +static void iommu_set_cwwb_range(struct amd_iommu *iommu) +{ + u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); + u64 entry = start & PM_ADDR_MASK; + + if (!iommu_feature(iommu, FEATURE_SNP)) + return; + + /* Note: +* Re-purpose Exclusion base/limit registers for Completion wait +* write-back base/limit. +*/ + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + /* Note: +* Default to 4 Kbytes, which can be specified by setting base +* address equal to the limit address. +*/ + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + /* Programs the physical address of the device table into the IOMMU hardware */ static void iommu_set_device_table(struct amd_iommu *iommu) { @@ -1901,6 +1924,9 @@ static int __init amd_iommu_init_pci(void) ret = iommu_init_pci(iommu); if (ret) break; + + /* Need to setup range after PCI init */ + iommu_set_cwwb_range(iommu); } /* -- 2.17.1
[PATCH v2 0/3] amd : iommu : Initial IOMMU support for SNP
Introducing support for AMD Secure Nested Paging (SNP) with IOMMU, which mainly affects the use of IOMMU Exclusion Base and Range Limit registers. Note that these registers are no longer used by Linux IOMMU driver. Patch 2 and 3 are SNP-specific, and discuss detail of the implementation. In order to support SNP, the current Completion Wait Write-back logic is modified (patch 1/4). This change is independent from SNP. Please see the following white paper for more info on SNP: https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf Changes from V1: (https://lkml.org/lkml/2020/9/16/455) - Patch 2/3: Fix up per Joerg's comments Thank you, Suravee Suravee Suthikulpanit (3): iommu: amd: Use 4K page for completion wait write-back semaphore iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR iommu: amd: Re-purpose Exclusion range registers to support SNP CWWB drivers/iommu/amd/amd_iommu_types.h | 6 +- drivers/iommu/amd/init.c| 44 ++ drivers/iommu/amd/iommu.c | 90 + 3 files changed, 127 insertions(+), 13 deletions(-) -- 2.17.1
[PATCH v2 2/3] iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR
IOMMU SNP support introduces two new IOMMU events: * RMP Page Fault event * RMP Hardware Error event Hence, add reporting functions for these events. Cc: Brijesh Singh Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu_types.h | 2 + drivers/iommu/amd/iommu.c | 67 + 2 files changed, 69 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 4c80483e78ec..1e7966c73707 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -128,6 +128,8 @@ #define EVENT_TYPE_IOTLB_INV_TO0x7 #define EVENT_TYPE_INV_DEV_REQ 0x8 #define EVENT_TYPE_INV_PPR_REQ 0x9 +#define EVENT_TYPE_RMP_FAULT 0xd +#define EVENT_TYPE_RMP_HW_ERR 0xe #define EVENT_DEVID_MASK 0x #define EVENT_DEVID_SHIFT 0 #define EVENT_DOMID_MASK_LO0x diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9e9898683537..ea64fa8a9418 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -486,6 +486,67 @@ static void dump_command(unsigned long phys_addr) pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } +static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +{ + struct iommu_dev_data *dev_data = NULL; + int devid, vmg_tag, flags; + struct pci_dev *pdev; + u64 spa; + + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + vmg_tag = (event[1]) & 0x; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + spa = ((u64)event[3] << 32) | (event[2] & 0xFFF8); + + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + devid & 0xff); + if (pdev) + dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + vmg_tag, spa, flags); + } else { + pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + vmg_tag, spa, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + +static void amd_iommu_report_rmp_fault(volatile u32 *event) +{ + struct iommu_dev_data *dev_data = NULL; + int devid, flags_rmp, vmg_tag, flags; + struct pci_dev *pdev; + u64 gpa; + + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF; + vmg_tag = (event[1]) & 0x; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + gpa = ((u64)event[3] << 32) | event[2]; + + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + devid & 0xff); + if (pdev) + dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + vmg_tag, gpa, flags_rmp, flags); + } else { + pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + vmg_tag, gpa, flags_rmp, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, u64 address, int flags) { @@ -577,6 +638,12 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; + case EVENT_TYPE_RMP_FAULT: + amd_iommu_report_rmp_fault(event); + break; + case EVENT_TYPE_RMP_HW_ERR: + amd_iommu_report_rmp_hw_error(event); + break; case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; -- 2.17.1
Re: [PATCH 2/3] iommu: amd: Add support for RMP_PAGE_FAULT and RMP_HW_ERR
On 9/18/20 4:31 PM, Joerg Roedel wrote: Hi Suravee, On Wed, Sep 16, 2020 at 01:55:48PM +, Suravee Suthikulpanit wrote: +static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +{ + struct pci_dev *pdev; + struct iommu_dev_data *dev_data = NULL; + int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + int vmg_tag = (event[1]) & 0x; + int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + u64 spa = ((u64)event[3] << 32) | (event[2] & 0xFFF8); Please write this as: struct iommu_dev_data *dev_data = NULL; int devid, vmg_tag, flags; struct pci_dev *pdev; u64 spa; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; vmg_tag = (event[1]) & 0x; flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; spa = ((u64)event[3] << 32) | (event[2] & 0xFFF8); Same applied the the next function. + + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + devid & 0xff); + if (pdev) + dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_HW_ERROR devid=0x%04x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + devid, vmg_tag, spa, flags); Printing the devid is not really needed here, no? Same issue in the next function. I'll update the patch and will send out V2. Thanks, Suravee
[PATCH 08/13] iommu: amd: Remove amd_iommu_domain_get_pgtable
Since the IO page table root and mode parameters have been moved into the struct amd_io_pg, the function is no longer needed. Therefore, remove it along with the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 4 ++-- drivers/iommu/amd/amd_iommu_types.h | 6 - drivers/iommu/amd/io_pgtable.c | 36 ++--- drivers/iommu/amd/iommu.c | 34 --- 4 files changed, 19 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8dff7d85be79..2059e64fdc53 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -101,6 +101,8 @@ static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } static inline @@ -135,8 +137,6 @@ extern unsigned long iommu_unmap_page(struct protection_domain *dom, extern u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size); -extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, -struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5d53b7bec256..a07af389eae1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -511,12 +511,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 8ce2f0325123..524c5406ccd6 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -215,30 +215,27 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { - struct domain_pgtable pgtable; unsigned long flags; bool ret = true; u64 *pte; spin_lock_irqsave(&domain->lock, flags); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) goto out; ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); - pgtable.root = pte; - pgtable.mode += 1; + domain->iop.root = pte; + domain->iop.mode += 1; amd_iommu_update_and_flush_device_table(domain); amd_iommu_domain_flush_complete(domain); @@ -246,7 +243,7 @@ static bool increase_address_space(struct protection_domain *domain, * Device Table needs to be updated and flushed before the new root can * be published. */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); ret = true; @@ -263,29 +260,23 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { - struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { /* * Return an error if there is no memory to update the * page-table. */ if (!increase_address_space(domain, address, gfp)) return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); } - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; + level = domain-
[PATCH 13/13] iommu: amd: Adopt IO page table framework
Switch to using IO page table framework for AMD IOMMU v1 page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 10 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2e8dc2a1ec0f..046ea81a3b77 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -127,4 +127,7 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); +extern struct io_pgtable_ops * +amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data, + struct protection_domain *pdom); #endif diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 77f44b927ae7..df304d8a630a 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1580,6 +1581,7 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) static int attach_device(struct device *dev, struct protection_domain *domain) { + struct io_pgtable_ops *pgtbl_ops; struct iommu_dev_data *dev_data; struct pci_dev *pdev; unsigned long flags; @@ -1623,6 +1625,12 @@ static int attach_device(struct device *dev, skip_ats_check: ret = 0; + pgtbl_ops = amd_iommu_setup_io_pgtable_ops(dev_data, domain); + if (!pgtbl_ops) { + ret = -ENOMEM; + goto out; + } + do_attach(dev_data, domain); /* @@ -1958,6 +1966,8 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) if (domain->dev_cnt > 0) cleanup_domain(domain); + free_io_pgtable_ops(&domain->iop.iop.ops); + BUG_ON(domain->dev_cnt != 0); if (!dom) -- 2.17.1
[PATCH 12/13] iommu: amd: Introduce iommu_v1_map_page and iommu_v1_unmap_page
These implement map and unmap for AMD IOMMU v1 pagetable, which will be used by the IO pagetable framework. Also clean up unused extern function declarations. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 13 - drivers/iommu/amd/io_pgtable.c | 25 - drivers/iommu/amd/iommu.c | 7 --- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 69996e57fae2..2e8dc2a1ec0f 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -124,19 +124,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -/* TODO: These are temporary and will be removed once fully transition */ -extern int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp); -extern unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size); -extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 2f36bab23516..7f7be302c538 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -348,9 +348,9 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct amd_io_pgtable *pgtable, - unsigned long address, - unsigned long *page_size) +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) { int level; u64 *pte; @@ -423,13 +423,10 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -int iommu_map_page(struct protection_domain *dom, - unsigned long iova, - unsigned long paddr, - unsigned long size, - int prot, - gfp_t gfp) +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); struct page *freelist = NULL; bool updated = false; u64 __pte, *pte; @@ -492,11 +489,11 @@ int iommu_map_page(struct protection_domain *dom, return ret; } -unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long iova, - unsigned long size) +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) { - struct io_pgtable_ops *ops = &dom->iop.iop.ops; struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; @@ -571,6 +568,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct protection_domain *pdom = (struct protection_domain *)cookie; + pdom->iop.iop.ops.map = iommu_v1_map_page; + pdom->iop.iop.ops.unmap= iommu_v1_unmap_page; pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pdom->iop.iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9a1a16031e00..77f44b927ae7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2044,6 +2044,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; int ret; @@ -2055,8 +2056,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - + ret = op
[PATCH 10/13] iommu: amd: Refactor fetch_pte to use struct amd_io_pgtable
To simplify the fetch_pte function. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 13 +++-- drivers/iommu/amd/iommu.c | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2059e64fdc53..69996e57fae2 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,7 +134,7 @@ extern int iommu_map_page(struct protection_domain *dom, extern unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long page_size); -extern u64 *fetch_pte(struct protection_domain *domain, +extern u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 5da5ce98b7b3..f913fc7b1e58 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -348,7 +348,7 @@ static u64 *alloc_pte(struct protection_domain *domain, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -u64 *fetch_pte(struct protection_domain *domain, +u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsigned long address, unsigned long *page_size) { @@ -357,11 +357,11 @@ u64 *fetch_pte(struct protection_domain *domain, *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->iop.mode)) + if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL; - level = domain->iop.mode - 1; - pte= &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + level = pgtable->mode - 1; + pte= &pgtable->root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -496,6 +496,8 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unsigned long iova, unsigned long size) { + struct io_pgtable_ops *ops = &dom->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; unsigned long unmap_size; u64 *pte; @@ -505,8 +507,7 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped = 0; while (unmapped < size) { - pte = fetch_pte(dom, iova, &unmap_size); - + pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { int i, count; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3f6ede1e572c..87cea1cde414 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2078,13 +2078,15 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long offset_mask, pte_pgsize; u64 *pte, __pte; if (domain->iop.mode == PAGE_MODE_NONE) return iova; - pte = fetch_pte(domain, iova, &pte_pgsize); + pte = fetch_pte(pgtable, iova, &pte_pgsize); if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; -- 2.17.1
[PATCH 02/13] iommu: amd: Prepare for generic IO page table framework
Add initial hook up code to implement generic IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/Kconfig | 1 + drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 32 +++ drivers/iommu/amd/io_pgtable.c | 89 + drivers/iommu/amd/iommu.c | 10 drivers/iommu/io-pgtable.c | 3 + include/linux/io-pgtable.h | 2 + 7 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable.c diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index f696ac7c5f89..77cd8d966fbc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Maximum number of IOMMUs supported @@ -252,6 +253,19 @@ #define GA_GUEST_NR0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) @@ -461,6 +475,23 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ +struct protection_domain, iop) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -469,6 +500,7 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ atomic64_t pt_root; /* pgtable root and pgtable mode */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index ..452cad26a2b3 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt)pr_fmt(fmt) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_flush_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, +unsigned long iova, size_t granule, +void *cookie) +{ + struct protection_domain *pdom = cookie; + struct iommu_do
[PATCH 07/13] iommu: amd: Restructure code for freeing page table
Introduce amd_iommu_free_pgtable helper function, which consolidates logic for freeing page table. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/io_pgtable.c | 12 +++- drivers/iommu/amd/iommu.c | 19 ++- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index ee7ff4d827e1..8dff7d85be79 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -123,7 +123,6 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif /* TODO: These are temporary and will be removed once fully transition */ -extern void free_pagetable(struct domain_pgtable *pgtable); extern int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, @@ -140,4 +139,5 @@ extern void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable); extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable); #endif diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 7e4154e26757..8ce2f0325123 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -167,14 +167,24 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -void free_pagetable(struct domain_pgtable *pgtable) +void amd_iommu_free_pgtable(struct amd_io_pgtable *pgtable) { + struct protection_domain *dom; struct page *freelist = NULL; unsigned long root; if (pgtable->mode == PAGE_MODE_NONE) return; + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || pgtable->mode > PAGE_MODE_6_LEVEL); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4d65f64236b6..cbbea7b952fb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1882,17 +1882,13 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); kfree(domain); } @@ -2281,22 +2277,11 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - amd_iommu_domain_update(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + amd_iommu_free_pgtable(&domain->iop); spin_unlock_irqrestore(&domain->lock, flags); } -- 2.17.1
[PATCH 04/13] iommu: amd: Convert to using amd_io_pgtable
Make use of the new struct amd_io_pgtable in preparation to remove the struct domain_pgtable. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 25 ++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index da6e09657e00..22ecacb71675 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -47,6 +47,7 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c8b8619cc744..09da37c4c9c4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -90,8 +90,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); / * @@ -1482,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); domain_flush_complete(domain); /* @@ -1857,17 +1855,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1957,7 +1954,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2263,22 +2260,20 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * */ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } @@ -2288,7 +2283,7 @@ static void update_domain(struct protection_domain *domain) /* Update device table */ amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); -- 2.17.1
[PATCH 03/13] iommu: amd: Move pt_root to to struct amd_io_pgtable
To better organize the data structure since it contains IO page table related information. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 97cdb235ce69..da6e09657e00 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -96,7 +96,7 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->pt_root, root); + atomic64_set(&domain->iop.pt_root, root); } static inline diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 77cd8d966fbc..5d53b7bec256 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -490,6 +490,7 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ }; /* @@ -503,7 +504,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock;/* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx;/* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2b7eb51dcbb8..c8b8619cc744 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -146,7 +146,7 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, struct domain_pgtable *pgtable) { - u64 pt_root = atomic64_read(&domain->pt_root); + u64 pt_root = atomic64_read(&domain->iop.pt_root); pgtable->root = (u64 *)(pt_root & PAGE_MASK); pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -- 2.17.1
[PATCH 05/13] iommu: amd: Declare functions as extern
And move declaration to header file so that they can be included across multiple files. There is no functional change. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/iommu.c | 39 +-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 22ecacb71675..8b7be9171030 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -48,6 +48,9 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, u64 address); extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, unsigned long cr3); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 09da37c4c9c4..f91f35edb7ba 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -88,7 +88,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); / @@ -1294,12 +1293,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1324,7 +1323,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1481,7 +1480,7 @@ static bool increase_address_space(struct protection_domain *domain, pgtable.root = pte; pgtable.mode += 1; amd_iommu_update_and_flush_device_table(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* * Device Table needs to be updated and flushed before the new root can @@ -1734,8 +1733,8 @@ static int iommu_map_page(struct protection_domain *dom, * Updates and flushing already happened in * increase_address_space(). */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1978,10 +1977,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2114,9 +2113,9 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2277,7 +2276,7 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { struct domain_pgtable pgtable; @@ -2286,8 +2285,8 @@ static void update_domain(struct protection_domain *domain) amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); -
[PATCH 11/13] iommu: amd: Introduce iommu_v1_iova_to_phys
This implements iova_to_phys for AMD IOMMU v1 pagetable, which will be used by the IO page table framework. Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd/io_pgtable.c | 21 + drivers/iommu/amd/iommu.c | 16 +--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index f913fc7b1e58..2f36bab23516 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -525,6 +525,26 @@ unsigned long iommu_unmap_page(struct protection_domain *dom, return unmapped; } +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + struct io_pgtable_ops *amd_iommu_setup_io_pgtable_ops(struct iommu_dev_data *dev_data, struct protection_domain *domain) { @@ -551,6 +571,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct protection_domain *pdom = (struct protection_domain *)cookie; + pdom->iop.iop.ops.iova_to_phys = iommu_v1_iova_to_phys; return &pdom->iop.iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 87cea1cde414..9a1a16031e00 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2079,22 +2079,8 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - unsigned long offset_mask, pte_pgsize; - u64 *pte, __pte; - if (domain->iop.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(pgtable, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) -- 2.17.1