Hi, On 09/06/2018 10:14 AM, Tian, Kevin wrote:
From: Lu Baolu [mailto:[email protected]] Sent: Thursday, August 30, 2018 9:35 AMIn scalable mode, pasid structure is a two level table with a pasid directory table and a pasid table. Any pasid entry can be identified by a pasid value in below way. 1 9 6 5 0 .-----------------------.-------. | PASID | | '-----------------------'-------' .-------------. | | | | | | | | | | | | | .-----------. | .-------------. | | | |----->| PASID Entry | | | | | '-------------' | | | |Plus | | | .-----------. | | | |---->| DIR Entry |-------->| | | '-----------' '-------------' .---------. |Plus | | | Context | | | | | Entry |------->| | '---------' '-----------' This changes the pasid table APIs to support scalable mode PASID directory and PASID table. It also adds a helper to get the PASID table entry according to the pasid value. Cc: Ashok Raj <[email protected]> Cc: Jacob Pan <[email protected]> Cc: Kevin Tian <[email protected]> Cc: Liu Yi L <[email protected]> Signed-off-by: Sanjay Kumar <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Reviewed-by: Ashok Raj <[email protected]> --- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/intel-pasid.c | 72 ++++++++++++++++++++++++++++++++---- - drivers/iommu/intel-pasid.h | 10 +++++- drivers/iommu/intel-svm.c | 6 +--- 4 files changed, 74 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5845edf4dcf9..b0da4f765274 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2507,7 +2507,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev) dev->archdata.iommu = info; - if (dev && dev_is_pci(dev) && info->pasid_supported) { + if (dev && dev_is_pci(dev) && sm_supported(iommu)) {worthy of a comment here that PASID table now is mandatory in scalable mode, instead of optional for 1st level usage before.
Fair enough. Will add in the next version.
ret = intel_pasid_alloc_table(dev); if (ret) { __dmar_remove_one_dev_info(info); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index fe95c9bd4d33..d6e90cd5b062 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -127,8 +127,7 @@ int intel_pasid_alloc_table(struct device *dev) int ret, order; info = dev->archdata.iommu; - if (WARN_ON(!info || !dev_is_pci(dev) || - !info->pasid_supported || info->pasid_table)) + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL;following same logic should you check sm_supported here?
If not sm_supported, info->pasid_table should be NULL. Checking info->pasid_table is better since even sm_supported, the pasid table pointer could still possible to be empty.
/* DMA alias device already has a pasid table, use it: */ @@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev) return -ENOMEM; INIT_LIST_HEAD(&pasid_table->dev); - size = sizeof(struct pasid_entry); + size = sizeof(struct pasid_dir_entry); count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); + count >>= PASID_PDE_SHIFT; order = get_order(size * count); pages = alloc_pages_node(info->iommu->node, GFP_ATOMIC | __GFP_ZERO, @@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->table = page_address(pages); pasid_table->order = order; - pasid_table->max_pasid = count; + pasid_table->max_pasid = count << PASID_PDE_SHIFT;are you sure of that count is PDE_SHIFT aligned? otherwise >> then << would lose some bits. If sure, then better add some check.
I am making the max_pasid PDE_SHIFT aligned as the result of shift operations.
attach_out: device_attach_pasid_table(info, pasid_table); @@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev) return 0; } +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool +pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT;curious why adding READ_ONCE specifically for PASID structure, but not used for any other existing vtd structures? Is it to address some specific requirement on PASID structure as defined in spec?
READ/WRITE_ONCE are used in pasid entry read/write to prevent the compiler from merging, refetching or reordering successive instances of read/write.
+} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; + struct pasid_entry *table; + int i, max_pde; info = dev->archdata.iommu; - if (!info || !dev_is_pci(dev) || - !info->pasid_supported || !info->pasid_table) + if (!info || !dev_is_pci(dev) || !info->pasid_table) return; pasid_table = info->pasid_table; @@ -178,6 +197,14 @@ void intel_pasid_free_table(struct device *dev) if (!list_empty(&pasid_table->dev)) return; + /* Free scalable mode PASID directory tables: */ + dir = pasid_table->table; + max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; + for (i = 0; i < max_pde; i++) { + table = get_pasid_table_from_pde(&dir[i]); + free_pgtable_page(table); + } + free_pages((unsigned long)pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -206,17 +233,37 @@ int intel_pasid_get_dev_max_id(struct device *dev) struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) { + struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; struct pasid_entry *entries; + int dir_index, index; pasid_table = intel_pasid_get_table(dev); if (WARN_ON(!pasid_table || pasid < 0 || pasid >= intel_pasid_get_dev_max_id(dev))) return NULL; - entries = pasid_table->table; + dir = pasid_table->table; + info = dev->archdata.iommu; + dir_index = pasid >> PASID_PDE_SHIFT; + index = pasid & PASID_PTE_MASK; + + spin_lock(&pasid_lock); + entries = get_pasid_table_from_pde(&dir[dir_index]); + if (!entries) { + entries = alloc_pgtable_page(info->iommu->node); + if (!entries) { + spin_unlock(&pasid_lock); + return NULL; + } + + WRITE_ONCE(dir[dir_index].val, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + } + spin_unlock(&pasid_lock); - return &entries[pasid]; + return &entries[index]; } /* @@ -224,7 +271,14 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) */ static inline void pasid_clear_entry(struct pasid_entry *pe) { - WRITE_ONCE(pe->val, 0); + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0);memset?
The order is important here. Otherwise, the PRESENT bit of this pasid entry might still set while other fields contains invalid values.
} void intel_pasid_clear_entry(struct device *dev, int pasid) diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 1c05ed6fc5a5..12f480c2bb8b 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -12,11 +12,19 @@ #define PASID_MIN 0x1 #define PASID_MAX 0x100000 +#define PASID_PTE_MASK 0x3F +#define PASID_PTE_PRESENT 1 +#define PDE_PFN_MASK PAGE_MASK +#define PASID_PDE_SHIFT 6 -struct pasid_entry { +struct pasid_dir_entry { u64 val; }; +struct pasid_entry { + u64 val[8]; +}; + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 4a03e5090952..6c0bd9ee9602 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu) order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); if (ecap_dis(iommu->ecap)) { - /* Just making it explicit... */ - BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (pages) iommu->pasid_state_table = page_address(pages); @@ -406,9 +404,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_entry_val |= PASID_ENTRY_FLPM_5LP; entry = intel_pasid_get_entry(dev, svm->pasid); - entry->val = pasid_entry_val; - - wmb(); + WRITE_ONCE(entry->val[0], pasid_entry_val); /* * Flush PASID cache when a PASID table entry becomes -- 2.17.1
Best regards, Lu Baolu _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
