Re: [PATCH v4 05/22] iommu: introduce iommu invalidate API function
On Mon Apr 16 18, Jacob Pan wrote: From: "Liu, Yi L"When an SVM capable device is assigned to a guest, the first level page tables are owned by the guest and the guest PASID table pointer is linked to the device context entry of the physical IOMMU. Host IOMMU driver has no knowledge of caching structure updates unless the guest invalidation activities are passed down to the host. The primary usage is derived from emulated IOMMU in the guest, where QEMU can trap invalidation activities before passing them down to the host/physical IOMMU. Since the invalidation data are obtained from user space and will be written into physical IOMMU, we must allow security check at various layers. Therefore, generic invalidation data format are proposed here, model specific IOMMU drivers need to convert them into their own format. Signed-off-by: Liu, Yi L Signed-off-by: Jean-Philippe Brucker Signed-off-by: Jacob Pan Signed-off-by: Ashok Raj --- drivers/iommu/iommu.c | 14 include/linux/iommu.h | 12 +++ include/uapi/linux/iommu.h | 79 ++ 3 files changed, 105 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3a69620..784e019 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1344,6 +1344,20 @@ void iommu_unbind_pasid_table(struct iommu_domain *domain, struct device *dev) } EXPORT_SYMBOL_GPL(iommu_unbind_pasid_table); +int iommu_sva_invalidate(struct iommu_domain *domain, + struct device *dev, struct tlb_invalidate_info *inv_info) +{ + int ret = 0; + + if (unlikely(!domain->ops->sva_invalidate)) + return -ENODEV; + + ret = domain->ops->sva_invalidate(domain, dev, inv_info); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_sva_invalidate); + static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8ad111f..e963dbd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -190,6 +190,7 @@ struct iommu_resv_region { * @pgsize_bitmap: bitmap of all possible supported page sizes * @bind_pasid_table: bind pasid table pointer for guest SVM * @unbind_pasid_table: unbind pasid table pointer and restore defaults + * @sva_invalidate: invalidate translation caches of shared virtual address */ struct iommu_ops { bool (*capable)(enum iommu_cap); @@ -243,6 +244,8 @@ struct iommu_ops { struct pasid_table_config *pasidt_binfo); void (*unbind_pasid_table)(struct iommu_domain *domain, struct device *dev); + int (*sva_invalidate)(struct iommu_domain *domain, + struct device *dev, struct tlb_invalidate_info *inv_info); unsigned long pgsize_bitmap; }; @@ -309,6 +312,9 @@ extern int iommu_bind_pasid_table(struct iommu_domain *domain, struct device *dev, struct pasid_table_config *pasidt_binfo); extern void iommu_unbind_pasid_table(struct iommu_domain *domain, struct device *dev); +extern int iommu_sva_invalidate(struct iommu_domain *domain, + struct device *dev, struct tlb_invalidate_info *inv_info); + extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); @@ -720,6 +726,12 @@ void iommu_unbind_pasid_table(struct iommu_domain *domain, struct device *dev) { } +static inline int iommu_sva_invalidate(struct iommu_domain *domain, + struct device *dev, struct tlb_invalidate_info *inv_info) +{ + return -EINVAL; +} + Would -ENODEV make more sense here? #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 9f7a6bf..4447943 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -29,4 +29,83 @@ struct pasid_table_config { __u8 pasid_bits; }; +/** + * enum iommu_inv_granularity - Generic invalidation granularity + * + * When an invalidation request is sent to IOMMU to flush translation caches, + * it may carry different granularity. These granularity levels are not specific + * to a type of translation cache. For an example, PASID selective granularity + * is only applicable to PASID cache invalidation. + * This enum is a collection of granularities for all types of translation + * caches. The idea is to make it easy for IOMMU model specific driver do + * conversion from generic to model specific value. + */ +enum iommu_inv_granularity { + IOMMU_INV_GRANU_DOMAIN = 1, /* all TLBs associated with a domain */ + IOMMU_INV_GRANU_DEVICE, /* caching
Re: [PATCH v2] iommu/amd: Reserve exclusion range in iova-domain
On Fri Mar 29 19, Joerg Roedel wrote: From: Joerg Roedel If a device has an exclusion range specified in the IVRS table, this region needs to be reserved in the iova-domain of that device. This hasn't happened until now and can cause data corruption on data transfered with these devices. Treat exclusion ranges as reserved regions in the iommu-core to fix the problem. Fixes: be2a022c0dd0 ('x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices') Signed-off-by: Joerg Roedel I have a version of this that applies to 4.4 and 4,9 using the older dm_region code if that would be useful for stable. --8<-- diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 0ad8b7c78a43..f388458624cf 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3165,11 +3165,14 @@ static void amd_iommu_get_dm_regions(struct device *dev, } region->start = entry->address_start; + region->type = IOMMU_RESV_DIRECT; region->length = entry->address_end - entry->address_start; if (entry->prot & IOMMU_PROT_IR) region->prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) region->prot |= IOMMU_WRITE; + if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) + region->type = IOMMU_RESV_RESERVED; list_add_tail(>list, head); } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 94f1bf772ec9..d84041bc77ac 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1495,6 +1495,9 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + switch (m->type) { default: kfree(e); @@ -1541,9 +1544,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) + if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) init_unity_map_range(m); p += m->length; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index b08cf57bf455..31d27eb70565 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -324,6 +324,8 @@ #define IOMMU_PROT_IR 0x01 #define IOMMU_PROT_IW 0x02 +#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE(1 << 2) + /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a070fa39521a..ef4aa2879952 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -351,6 +351,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); + if (entry->type != IOMMU_RESV_DIRECT) + continue; + for (addr = start; addr < end; addr += pg_size) { phys_addr_t phys_addr; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f28dff313b07..15b7378f67f3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -115,18 +115,23 @@ enum iommu_attr { DOMAIN_ATTR_MAX, }; +#define IOMMU_RESV_DIRECT (1 << 0) +#define IOMMU_RESV_RESERVED(1 << 1) + /** * struct iommu_dm_region - descriptor for a direct mapped memory region * @list: Linked list pointers * @start: System physical start address of the region * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) + * @type: Type of region (DIRECT, RESERVED) */ struct iommu_dm_region { struct list_headlist; phys_addr_t start; size_t length; int prot; + int type; }; #ifdef CONFIG_IOMMU_API -- 2.21.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2] iommu/amd: Reserve exclusion range in iova-domain
On Fri Mar 29 19, Joerg Roedel wrote: From: Joerg Roedel If a device has an exclusion range specified in the IVRS table, this region needs to be reserved in the iova-domain of that device. This hasn't happened until now and can cause data corruption on data transfered with these devices. Treat exclusion ranges as reserved regions in the iommu-core to fix the problem. Fixes: be2a022c0dd0 ('x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 9 ++--- drivers/iommu/amd_iommu_init.c | 7 --- drivers/iommu/amd_iommu_types.h | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21cb088d6687..f7cdd2ab7f11 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3169,21 +3169,24 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_for_each_entry(entry, _iommu_unity_map, list) { + int type, prot = 0; size_t length; - int prot = 0; if (devid < entry->devid_start || devid > entry->devid_end) continue; + type = IOMMU_RESV_DIRECT; length = entry->address_end - entry->address_start; if (entry->prot & IOMMU_PROT_IR) prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) prot |= IOMMU_WRITE; + if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) + /* Exclusion range */ + type = IOMMU_RESV_RESERVED; region = iommu_alloc_resv_region(entry->address_start, -length, prot, -IOMMU_RESV_DIRECT); +length, prot, type); if (!region) { dev_err(dev, "Out of memory allocating dm-regions\n"); return; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f773792d77fd..1b1378619fc9 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2013,6 +2013,9 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + switch (m->type) { default: kfree(e); @@ -2059,9 +2062,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) + if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) init_unity_map_range(m); p += m->length; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index eae0741f72dc..87965e4d9647 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -374,6 +374,8 @@ #define IOMMU_PROT_IR 0x01 #define IOMMU_PROT_IW 0x02 +#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE(1 << 2) + /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 -- 2.16.4 Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/amd: print reason for iommu_map_page failure in map_sg
Since there are multiple possible failures in iommu_map_page it would be useful to know which case is being hit when the error message is printed in map_sg. While here, fix up checkpatch complaint about using function name in a string instead of __func__. Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar --- drivers/iommu/amd_iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..675f7027aa04 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2562,6 +2562,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; unsigned long address; u64 dma_mask; + int ret; domain = get_domain(dev); if (IS_ERR(domain)) @@ -2584,7 +2585,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for (j = 0; j < pages; ++j) { unsigned long bus_addr, phys_addr; - int ret; bus_addr = address + s->dma_address + (j << PAGE_SHIFT); phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); @@ -2605,8 +2605,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, return nelems; out_unmap: - pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", - dev_name(dev), npages); + pr_err("%s: IOMMU mapping error in %s (io-pages: %d) reason: %d\n", + dev_name(dev), __func__, npages, ret); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); -- 2.20.1.98.gecbdaf0899 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/vt-d: only attempt to cleanup svm page request irq if one assigned
Only try to clean up the svm page request irq if one has been assigned. Also clear pr_irq in the error path if irq request fails. Signed-off-by: Jerry Snitselaar <jsnit...@redhat.com> --- drivers/iommu/intel-svm.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f6697e55c2d4..003b4a4d4b78 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -129,6 +129,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", iommu->name); dmar_free_hwirq(irq); + iommu->pr_irq = 0; goto err; } dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); @@ -144,9 +145,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); - free_irq(iommu->pr_irq, iommu); - dmar_free_hwirq(iommu->pr_irq); - iommu->pr_irq = 0; + if (iommu->pr_irq) { + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + } free_pages((unsigned long)iommu->prq, PRQ_ORDER); iommu->prq = NULL; -- 2.13.0.rc0.45.ge2cb6ab84 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/vt-d: clean up pr_irq if request_threaded_irq fails
It is unlikely request_threaded_irq will fail, but if it does for some reason we should clear iommu->pr_irq in the error path. Also intel_svm_finish_prq shouldn't try to clean up the page request interrupt if pr_irq is 0. Without these, if request_threaded_irq were to fail the following occurs: fail with no fixes: [0.683147] [ cut here ] [0.683148] NULL pointer, cannot free irq [0.683158] WARNING: CPU: 1 PID: 1 at kernel/irq/irqdomain.c:1632 irq_domain_free_irqs+0x126/0x140 [0.683160] Modules linked in: [0.683163] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2 #3 [0.683165] Hardware name: /NUC7i3BNB, BIOS BNKBL357.86A.0036.2017.0105.1112 01/05/2017 [0.683168] RIP: 0010:irq_domain_free_irqs+0x126/0x140 [0.683169] RSP: :c9037ce8 EFLAGS: 00010292 [0.683171] RAX: 001d RBX: 880276283c00 RCX: 81c5e5e8 [0.683172] RDX: 0001 RSI: 0096 RDI: 0246 [0.683174] RBP: 880276283c00 R08: R09: 023c [0.683175] R10: 0007 R11: R12: 007a [0.683176] R13: 0001 R14: R15: 01001000 [0.683178] FS: () GS:88027ec8() knlGS: [0.683180] CS: 0010 DS: ES: CR0: 80050033 [0.683181] CR2: CR3: 01c09001 CR4: 003606e0 [0.683182] Call Trace: [0.683189] intel_svm_finish_prq+0x3c/0x60 [0.683191] free_dmar_iommu+0x1ac/0x1b0 [0.683195] init_dmars+0xaaa/0xaea [0.683200] ? klist_next+0x19/0xc0 [0.683203] ? pci_do_find_bus+0x50/0x50 [0.683205] ? pci_get_dev_by_id+0x52/0x70 [0.683208] intel_iommu_init+0x498/0x5c7 [0.683211] pci_iommu_init+0x13/0x3c [0.683214] ? e820__memblock_setup+0x61/0x61 [0.683217] do_one_initcall+0x4d/0x1a0 [0.683220] kernel_init_freeable+0x186/0x20e [0.683222] ? set_debug_rodata+0x11/0x11 [0.683225] ? rest_init+0xb0/0xb0 [0.683226] kernel_init+0xa/0xff [0.683229] ret_from_fork+0x1f/0x30 [0.683259] Code: 89 ee 44 89 e7 e8 3b e8 ff ff 5b 5d 44 89 e7 44 89 ee 41 5c 41 5d 41 5e e9 a8 84 ff ff 48 c7 c7 a8 71 a7 81 31 c0 e8 6a d3 f9 ff <0f> ff 5b 5d 41 5c 41 5d 41 5 e c3 0f 1f 44 00 00 66 2e 0f 1f 84 [0.683285] ---[ end trace f7650e42792627ca ]--- with iommu->pr_irq = 0, but no check in intel_svm_finish_prq: [0.669561] [ cut here ] [0.669563] Trying to free already-free IRQ 0 [0.669573] WARNING: CPU: 3 PID: 1 at kernel/irq/manage.c:1546 __free_irq+0xa4/0x2c0 [0.669574] Modules linked in: [0.669577] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2 #4 [0.669579] Hardware name: /NUC7i3BNB, BIOS BNKBL357.86A.0036.2017.0105.1112 01/05/2017 [0.669581] RIP: 0010:__free_irq+0xa4/0x2c0 [0.669582] RSP: :c9037cc0 EFLAGS: 00010082 [0.669584] RAX: 0021 RBX: RCX: 81c5e5e8 [0.669585] RDX: 0001 RSI: 0086 RDI: 0046 [0.669587] RBP: R08: R09: 023c [0.669588] R10: 0007 R11: R12: 880276253960 [0.669589] R13: 8802762538a4 R14: 880276253800 R15: 880276283600 [0.669593] FS: () GS:88027ed8() knlGS: [0.669594] CS: 0010 DS: ES: CR0: 80050033 [0.669596] CR2: CR3: 01c09001 CR4: 003606e0 [0.669602] Call Trace: [0.669616] free_irq+0x30/0x60 [0.669620] intel_svm_finish_prq+0x34/0x60 [0.669623] free_dmar_iommu+0x1ac/0x1b0 [0.669627] init_dmars+0xaaa/0xaea [0.669631] ? klist_next+0x19/0xc0 [0.669634] ? pci_do_find_bus+0x50/0x50 [0.669637] ? pci_get_dev_by_id+0x52/0x70 [0.669639] intel_iommu_init+0x498/0x5c7 [0.669642] pci_iommu_init+0x13/0x3c [0.669645] ? e820__memblock_setup+0x61/0x61 [0.669648] do_one_initcall+0x4d/0x1a0 [0.669651] kernel_init_freeable+0x186/0x20e [0.669653] ? set_debug_rodata+0x11/0x11 [0.669656] ? rest_init+0xb0/0xb0 [0.669658] kernel_init+0xa/0xff [0.669661] ret_from_fork+0x1f/0x30 [0.669662] Code: 7a 08 75 0e e9 c3 01 00 00 4c 39 7b 08 74 57 48 89 da 48 8b 5a 18 48 85 db 75 ee 89 ee 48 c7 c7 78 67 a7 81 31 c0 e8 4c 37 fa ff <0f> ff 48 8b 34 24 4c 89 ef e 8 0e 4c 68 00 49 8b 46 40 48 8b 80 [0.669688] ---[ end trace 58a470248700f2fc ]--- Cc: Alex Williamson <alex.william...@redhat.com> Cc: Joerg Roedel <j...@8bytes.org> Cc: Ashok Raj <ashok....@intel.com> Signed-off-by: Jerry Snitselaar <jsnit...@redhat.com> --- drivers/iommu/intel-svm.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/dr
Re: source-id verification failures
On Fri Oct 05 18, Raj, Ashok wrote: On Thu, Oct 04, 2018 at 03:07:46PM -0700, Jacob Pan wrote: On Thu, 4 Oct 2018 13:57:24 -0700 Jerry Snitselaar wrote: > On Thu Oct 04 18, Joerg Roedel wrote: > >Hi Jerry, > > > >thanks for the report. > > > >On Tue, Oct 02, 2018 at 10:25:29AM -0700, Jerry Snitselaar wrote: > >> I've been trying to track down a problem where an hp dl380 gen8 > >> with a Cavium QLogic BR-1860 Fabric Adapter is getting source-id > >> verification failures when running dhclient against that > >> interface. This started showing up when I backported the iova > >> deferred flushing patches. So far this has only been seen on this > >> one system, but I'm trying to understand why it appears with the > >> new deferred flushing code. I also see it with both 4.18.10, and > >> 4.19.0-rc6 kernels. Weird.. IRC, these were there to accomodate phantom functions. Thought PCIe allowed 8bit tag, so if the device needs to allow more than 256 outstanding transactions, one could use the extra functions to account for. I assumed Linux didn't enable phantom functions. If that's the case we also need to ensure all the DMA is aliased properly. I'm assuming if interrupts are generated by other aliases we could block them. Is this device one such? Cheers, Ashok > >> > >> [35645.282021] bna :24:00.1 ens5f1: link down > >> [35645.298396] bna :24:00.0 ens5f0: link down > >> [35650.313210] DMAR: DRHD: handling fault status reg 2 > >> [35650.332477] DMAR: [INTR-REMAP] Request device [24:00.0] fault > >> index 14 [fault reason 38] Blocked an interrupt request due to > >> source-id verification failure [35655.137667] bna :24:00.0 > >> ens5f0: link up [35657.532454] bna :24:00.1 ens5f1: link up > >> [35664.281563] bna :24:00.1 ens5f1: link down [35664.298103] > >> bna :24:00.0 ens5f0: link down [35669.313568] DMAR: DRHD: > >> handling fault status reg 102 [35669.333198] DMAR: [INTR-REMAP] > >> Request device [24:00.0] fault index 14 [fault reason 38] Blocked > >> an interrupt request due to source-id verification failure > >> [35674.081212] bna :24:00.0 ens5f0: link up [35674.981280] bna > >> :24:00.1 ens5f1: link up > >> > >> > >> Any ideas? > > > >No, not yet. Can you please post the output of lscpi -vvv? > > > >Jacob, can you or someone from your team please also have a look into > >this problem report? > > yep. +Ashok Jerry, Could you also dump the interrupt remapping table with this patchset? https://lkml.org/lkml/2018/9/12/44 Thanks, Sorry, I've been on dad duty the past few days. I should be back working on this tonight or tomorrow. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: source-id verification failures
On Fri Oct 05 18, Jacob Pan wrote: On Thu, 4 Oct 2018 13:57:24 -0700 Jerry Snitselaar wrote: > >On Tue, Oct 02, 2018 at 10:25:29AM -0700, Jerry Snitselaar wrote: >> I've been trying to track down a problem where an hp dl380 gen8 >> with a Cavium QLogic BR-1860 Fabric Adapter is getting source-id >> verification failures when running dhclient against that >> interface. This started showing up when I backported the iova >> deferred flushing patches. So far this has only been seen on this >> one system, but I'm trying to understand why it appears with the >> new deferred flushing code. I also see it with both 4.18.10, and >> 4.19.0-rc6 kernels. >> Hi Jerry, Could you confirm that you see this failure in v4.19-rc6 kernel only without "strict" mode? I don't see a connection between deferred flushing and IR here, AFAIK deferred flush only affects DMA remapping. Also, does the SID failure occur on other devices under the same IOMMU? Thanks, Jacob Confirmed the system doesn't see the problem with intel_iommu=strict. We've only seen SID failures occur for the 2 ports on the brocade device. Another data point is that there is a dl388 gen8 with the same card, and we don't see any problems there. I'd say it is something with this system, but it is odd that the problem starts showing itself when I add those patches. # cat ir_translation_struct Remapped Interrupt supported on IOMMU: dmar0 IR table address:42f20 Entry SrcID DstIDVct IRTE_highIRTE_low 2 24:00.0 00020001 21 00042400 000200010021000d 3 24:00.0 00020200 28 00042400 00020228000d 4 24:00.0 00020800 28 00042400 00020828000d 5 24:00.0 0001 ef 00042400 000100ef000d 6 24:00.0 0001 ef 00042400 000100ef000d 7 24:00.0 0001 ef 00042400 000100ef000d 8 24:00.0 0001 ef 00042400 000100ef000d 9 24:00.0 0001 ef 00042400 000100ef000d 1024:00.0 0001 ef 00042400 000100ef000d 1124:00.0 0001 ef 00042400 000100ef000d 1224:00.0 0001 ef 00042400 000100ef000d 1324:00.0 0001 ef 00042400 000100ef000d 1424:00.0 0001 ef 00042400 000100ef000d 1524:00.0 0001 ef 00042400 000100ef000d 1624:00.0 0001 ef 00042400 000100ef000d 1724:00.0 0001 ef 00042400 000100ef000d 1824:00.0 0001 ef 00042400 000100ef000d 1924:00.0 0001 ef 00042400 000100ef000d 2024:00.1 00020004 25 00042401 000200040025000d 2124:00.1 00020001 29 00042401 000200010029000d 2224:00.1 00020004 29 00042401 000200040029000d 2324:00.1 0001 ef 00042401 000100ef000d 2424:00.1 0001 ef 00042401 000100ef000d 2524:00.1 0001 ef 00042401 000100ef000d 2624:00.1 0001 ef 00042401 000100ef000d 2724:00.1 0001 ef 00042401 000100ef000d 2824:00.1 0001 ef 00042401 000100ef000d 2924:00.1 0001 ef 00042401 000100ef000d 3024:00.1 0001 ef 00042401 000100ef000d 3124:00.1 0001 ef 00042401 000100ef000d 3224:00.1 0001 ef 00042401 000100ef000d 3324:00.1 0001 ef 00042401 000100ef000d 3424:00.1 0001 ef 00042401 000100ef000d 3524:00.1 0001 ef 00042401 000100ef000d 3624:00.1 0001 ef 00042401 000100ef000d 3724:00.1 0001 ef 00042401 000100ef000d 3920:04.0 00020010 28 00042020 000200100028000d 4120:04.1 00020040 28 00042021 000200400028000d 4220:04.2 00020100 28 00042022 00020128000d 4320:04.3 00020400 28 00042023 00020428000d 4420:04.4 00020002 28 00042024 000200020028000d 4520:04.5 00020008 28 00042025 000200080028000d 4620:04.6 00020020 28 00042026 000200200028000d 4720:04.7 00020080 28 00042027 000200800028000d Remapped Interrupt supported on IOMMU: dmar1 IR table address:42e80 Entry SrcID DstIDVct IRTE_highIRTE_low 0 00:1e.1 00020020 2a 000400f1 00020020002a000d 1 00:1e.1 0001 30 000400f1 0001003d 2 00:1e.1 00020200 2a 000400f1 0002022a000d 7 00:1e.1 00020004 2b 000400f1 00020004002b000d 8 00:1e.1 00020040 2b 000400f1 00020040002b000d 1100:1e.1 00020100 2b 000400f1 0002012b000d 1700:1e.1 00020010 29 000
source-id verification failures
I've been trying to track down a problem where an hp dl380 gen8 with a Cavium QLogic BR-1860 Fabric Adapter is getting source-id verification failures when running dhclient against that interface. This started showing up when I backported the iova deferred flushing patches. So far this has only been seen on this one system, but I'm trying to understand why it appears with the new deferred flushing code. I also see it with both 4.18.10, and 4.19.0-rc6 kernels. [35645.282021] bna :24:00.1 ens5f1: link down [35645.298396] bna :24:00.0 ens5f0: link down [35650.313210] DMAR: DRHD: handling fault status reg 2 [35650.332477] DMAR: [INTR-REMAP] Request device [24:00.0] fault index 14 [fault reason 38] Blocked an interrupt request due to source-id verification failure [35655.137667] bna :24:00.0 ens5f0: link up [35657.532454] bna :24:00.1 ens5f1: link up [35664.281563] bna :24:00.1 ens5f1: link down [35664.298103] bna :24:00.0 ens5f0: link down [35669.313568] DMAR: DRHD: handling fault status reg 102 [35669.333198] DMAR: [INTR-REMAP] Request device [24:00.0] fault index 14 [fault reason 38] Blocked an interrupt request due to source-id verification failure [35674.081212] bna :24:00.0 ens5f0: link up [35674.981280] bna :24:00.1 ens5f1: link up Any ideas? Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu: amd: call free_iova_fast with pfn in map_sg
In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..418df8ff3e50 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2623,7 +2623,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, } out_free_iova: - free_iova_fast(_dom->iovad, address, npages); + free_iova_fast(_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; -- 2.20.1.98.gecbdaf0899 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Question about out_unmap section of map_sg in amd_iommu.c
out_unmap: pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", dev_name(dev), npages); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); for (j = 0; j < pages; ++j) { unsigned long bus_addr; bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); if (--mapped_pages) goto out_free_iova; Is this condition correct? My thought is this was meant to break out of the loop early if all the mapped pages have been unmapped. So if (--mapped == 0) instead? } } ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/amd: unmap all mapped pages in error path of map_sg
In the error path of map_sg there is an incorrect if condition for breaking out of the loop that searches the scatterlist for mapped pages to unmap. Instead of breaking out of the loop once all the pages that were mapped have been unmapped, it will break out of the loop after it has unmapped 1 page. Fix the condition, so it breaks out of the loop only after all the mapped pages have been unmapped. Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1167ff0416cf..aeeca479f914 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2548,7 +2548,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } } -- 2.20.1.98.gecbdaf0899 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: amd: call free_iova_fast with pfn in map_sg
On Thu Jan 17 19, Jerry Snitselaar wrote: In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..418df8ff3e50 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2623,7 +2623,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, } out_free_iova: - free_iova_fast(_dom->iovad, address, npages); + free_iova_fast(_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; -- 2.20.1.98.gecbdaf0899 I forgot to add the Fixes line. Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 2/6] iommu/amd: Remove amd_iommu_devtable_lock
On Wed Sep 25 19, Joerg Roedel wrote: From: Joerg Roedel The lock is not necessary because the device table does not contain shared state that needs protection. Locking is only needed on an individual entry basis, and that needs to happen on the iommu_dev_data level. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 23 ++- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 042854bbc5bc..37a9c04fc728 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -70,7 +70,6 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) -static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -2080,10 +2079,11 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + unsigned long flags; int ret; /* lock domain */ - spin_lock(>lock); + spin_lock_irqsave(>lock, flags); ret = -EBUSY; if (dev_data->domain != NULL) @@ -2097,7 +2097,7 @@ static int __attach_device(struct iommu_dev_data *dev_data, out_unlock: /* ready */ - spin_unlock(>lock); + spin_unlock_irqrestore(>lock, flags); return ret; } @@ -2181,7 +2181,6 @@ static int attach_device(struct device *dev, { struct pci_dev *pdev; struct iommu_dev_data *dev_data; - unsigned long flags; int ret; dev_data = get_dev_data(dev); @@ -2209,9 +2208,7 @@ static int attach_device(struct device *dev, } skip_ats_check: - spin_lock_irqsave(_iommu_devtable_lock, flags); ret = __attach_device(dev_data, domain); - spin_unlock_irqrestore(_iommu_devtable_lock, flags); /* * We might boot into a crash-kernel here. The crashed kernel @@ -2231,14 +2228,15 @@ static int attach_device(struct device *dev, static void __detach_device(struct iommu_dev_data *dev_data) { struct protection_domain *domain; + unsigned long flags; domain = dev_data->domain; - spin_lock(>lock); + spin_lock_irqsave(>lock, flags); do_detach(dev_data); - spin_unlock(>lock); + spin_unlock_irqrestore(>lock, flags); } /* @@ -2248,7 +2246,6 @@ static void detach_device(struct device *dev) { struct protection_domain *domain; struct iommu_dev_data *dev_data; - unsigned long flags; dev_data = get_dev_data(dev); domain = dev_data->domain; @@ -2262,10 +2259,7 @@ static void detach_device(struct device *dev) if (WARN_ON(!dev_data->domain)) return; - /* lock device table */ - spin_lock_irqsave(_iommu_devtable_lock, flags); __detach_device(dev_data); - spin_unlock_irqrestore(_iommu_devtable_lock, flags); if (!dev_is_pci(dev)) return; @@ -2910,9 +2904,6 @@ int __init amd_iommu_init_dma_ops(void) static void cleanup_domain(struct protection_domain *domain) { struct iommu_dev_data *entry; - unsigned long flags; - - spin_lock_irqsave(_iommu_devtable_lock, flags); while (!list_empty(>dev_list)) { entry = list_first_entry(>dev_list, @@ -2920,8 +2911,6 @@ static void cleanup_domain(struct protection_domain *domain) BUG_ON(!entry->domain); __detach_device(entry); } - - spin_unlock_irqrestore(_iommu_devtable_lock, flags); } static void protection_domain_free(struct protection_domain *domain) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/6] iommu/amd: Take domain->lock for complete attach/detach path
On Wed Sep 25 19, Joerg Roedel wrote: From: Joerg Roedel The code-paths before __attach_device() and __detach_device() are called also access and modify domain state, so take the domain lock there too. This allows to get rid of the __detach_device() function. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 65 --- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 37a9c04fc728..2919168577ff 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2079,27 +2079,13 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - unsigned long flags; - int ret; - - /* lock domain */ - spin_lock_irqsave(>lock, flags); - - ret = -EBUSY; if (dev_data->domain != NULL) - goto out_unlock; + return -EBUSY; /* Attach alias group root */ do_attach(dev_data, domain); - ret = 0; - -out_unlock: - - /* ready */ - spin_unlock_irqrestore(>lock, flags); - - return ret; + return 0; } @@ -2181,8 +2167,11 @@ static int attach_device(struct device *dev, { struct pci_dev *pdev; struct iommu_dev_data *dev_data; + unsigned long flags; int ret; + spin_lock_irqsave(>lock, flags); + dev_data = get_dev_data(dev); if (!dev_is_pci(dev)) @@ -2190,12 +2179,13 @@ static int attach_device(struct device *dev, pdev = to_pci_dev(dev); if (domain->flags & PD_IOMMUV2_MASK) { + ret = -EINVAL; if (!dev_data->passthrough) - return -EINVAL; + goto out; if (dev_data->iommu_v2) { if (pdev_iommuv2_enable(pdev) != 0) - return -EINVAL; + goto out; dev_data->ats.enabled = true; dev_data->ats.qdep= pci_ats_queue_depth(pdev); @@ -2219,24 +2209,10 @@ static int attach_device(struct device *dev, domain_flush_complete(domain); - return ret; -} - -/* - * Removes a device from a protection domain (unlocked) - */ -static void __detach_device(struct iommu_dev_data *dev_data) -{ - struct protection_domain *domain; - unsigned long flags; - - domain = dev_data->domain; - - spin_lock_irqsave(>lock, flags); - - do_detach(dev_data); - +out: spin_unlock_irqrestore(>lock, flags); + + return ret; } /* @@ -2246,10 +,13 @@ static void detach_device(struct device *dev) { struct protection_domain *domain; struct iommu_dev_data *dev_data; + unsigned long flags; dev_data = get_dev_data(dev); domain = dev_data->domain; + spin_lock_irqsave(>lock, flags); + /* * First check if the device is still attached. It might already * be detached from its domain because the generic @@ -2257,12 +2236,12 @@ static void detach_device(struct device *dev) * our alias handling. */ if (WARN_ON(!dev_data->domain)) - return; + goto out; - __detach_device(dev_data); + do_detach(dev_data); if (!dev_is_pci(dev)) - return; + goto out; if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) pdev_iommuv2_disable(to_pci_dev(dev)); @@ -2270,6 +2249,9 @@ static void detach_device(struct device *dev) pci_disable_ats(to_pci_dev(dev)); dev_data->ats.enabled = false; + +out: + spin_unlock_irqrestore(>lock, flags); } static int amd_iommu_add_device(struct device *dev) @@ -2904,13 +2886,18 @@ int __init amd_iommu_init_dma_ops(void) static void cleanup_domain(struct protection_domain *domain) { struct iommu_dev_data *entry; + unsigned long flags; + + spin_lock_irqsave(>lock, flags); while (!list_empty(>dev_list)) { entry = list_first_entry(>dev_list, struct iommu_dev_data, list); BUG_ON(!entry->domain); - __detach_device(entry); + do_detach(entry); } + + spin_unlock_irqrestore(>lock, flags); } static void protection_domain_free(struct protection_domain *domain) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 5/6] iommu/amd: Lock dev_data in attach/detach code paths
On Wed Sep 25 19, Joerg Roedel wrote: From: Joerg Roedel Make sure that attaching a detaching a device can't race against each other and protect the iommu_dev_data with a spin_lock in these code paths. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 9 + drivers/iommu/amd_iommu_types.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 459247c32dc0..bac4e20a5919 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -201,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + spin_lock_init(_data->lock); dev_data->devid = devid; ratelimit_default_init(_data->rs); @@ -2157,6 +2158,8 @@ static int attach_device(struct device *dev, dev_data = get_dev_data(dev); + spin_lock(_data->lock); + ret = -EBUSY; if (dev_data->domain != NULL) goto out; @@ -2199,6 +2202,8 @@ static int attach_device(struct device *dev, domain_flush_complete(domain); out: + spin_unlock(_data->lock); + spin_unlock_irqrestore(>lock, flags); return ret; @@ -2218,6 +2223,8 @@ static void detach_device(struct device *dev) spin_lock_irqsave(>lock, flags); + spin_lock(_data->lock); + /* * First check if the device is still attached. It might already * be detached from its domain because the generic @@ -2240,6 +2247,8 @@ static void detach_device(struct device *dev) dev_data->ats.enabled = false; out: + spin_unlock(_data->lock); + spin_unlock_irqrestore(>lock, flags); } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0186501ab971..c9c1612d52e0 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -633,6 +633,9 @@ struct devid_map { * This struct contains device specific data for the IOMMU */ struct iommu_dev_data { + /*Protect against attach/detach races */ + spinlock_t lock; + struct list_head list;/* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/6] iommu/amd: Remove domain->updated
(struct protection_domain *domain) { - if (!domain->updated) - return; - update_device_table(domain); domain_flush_devices(domain); domain_flush_tlb_pde(domain); - - domain->updated = false; } static int dir2prot(enum dma_data_direction direction) @@ -,7 +3336,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) /* Update data structure */ domain->mode= PAGE_MODE_NONE; - domain->updated = true; /* Make changes visible to IOMMUs */ update_domain(domain); @@ -3379,7 +3381,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - domain->updated = true; update_domain(domain); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 9ac229e92b07..0186501ab971 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -475,7 +475,6 @@ struct protection_domain { int glx;/* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags;/* flags to find out type of domain */ - bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/6] iommu/amd: Check for busy devices earlier in attach_device()
On Wed Sep 25 19, Joerg Roedel wrote: From: Joerg Roedel Check early in attach_device whether the device is already attached to a domain. This also simplifies the code path so that __attach_device() can be removed. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 25 +++-- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2919168577ff..459247c32dc0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2072,23 +2072,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -/* - * If a device is not yet associated with a domain, this function makes the - * device visible in the domain - */ -static int __attach_device(struct iommu_dev_data *dev_data, - struct protection_domain *domain) -{ - if (dev_data->domain != NULL) - return -EBUSY; - - /* Attach alias group root */ - do_attach(dev_data, domain); - - return 0; -} - - static void pdev_iommuv2_disable(struct pci_dev *pdev) { pci_disable_ats(pdev); @@ -2174,6 +2157,10 @@ static int attach_device(struct device *dev, dev_data = get_dev_data(dev); + ret = -EBUSY; + if (dev_data->domain != NULL) + goto out; + if (!dev_is_pci(dev)) goto skip_ats_check; @@ -2198,7 +2185,9 @@ static int attach_device(struct device *dev, } skip_ats_check: - ret = __attach_device(dev_data, domain); + ret = 0; + + do_attach(dev_data, domain); /* * We might boot into a crash-kernel here. The crashed kernel -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 6/6] iommu/amd: Lock code paths traversing protection_domain->dev_list
On Wed Sep 25 19, Joerg Roedel wrote: From: Joerg Roedel The traversing of this list requires protection_domain->lock to be taken to avoid nasty races with attach/detach code. Make sure the lock is held on all code-paths traversing this list. Reported-by: Filippo Sironi Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bac4e20a5919..9c26976a0f99 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1334,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) { if (unlikely(amd_iommu_np_cache)) { + unsigned long flags; + + spin_lock_irqsave(>lock, flags); domain_flush_pages(domain, iova, size); domain_flush_complete(domain); + spin_unlock_irqrestore(>lock, flags); } } @@ -1700,8 +1704,13 @@ static int iommu_map_page(struct protection_domain *dom, ret = 0; out: - if (updated) + if (updated) { + unsigned long flags; + + spin_lock_irqsave(>lock, flags); update_domain(dom); + spin_unlock_irqrestore(>lock, flags); + } /* Everything flushed out, free pages now */ free_page_list(freelist); @@ -1857,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain) static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { + unsigned long flags; + + spin_lock_irqsave(>domain.lock, flags); domain_flush_tlb(>domain); domain_flush_complete(>domain); + spin_unlock_irqrestore(>domain.lock, flags); } static void iova_domain_flush_tlb(struct iova_domain *iovad) @@ -2414,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev, { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start, ret; + unsigned long flags; unsigned int pages; int prot = 0; int i; @@ -2451,8 +2465,10 @@ static dma_addr_t __map_single(struct device *dev, iommu_unmap_page(_dom->domain, start, PAGE_SIZE); } + spin_lock_irqsave(_dom->domain.lock, flags); domain_flush_tlb(_dom->domain); domain_flush_complete(_dom->domain); + spin_unlock_irqrestore(_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, address, pages); @@ -2481,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, } if (amd_iommu_unmap_flush) { + unsigned long flags; + + spin_lock_irqsave(_dom->domain.lock, flags); domain_flush_tlb(_dom->domain); domain_flush_complete(_dom->domain); + spin_unlock_irqrestore(_dom->domain.lock, flags); dma_ops_free_iova(dma_dom, dma_addr, pages); } else { pages = __roundup_pow_of_two(pages); @@ -3246,9 +3266,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct protection_domain *dom = to_pdomain(domain); + unsigned long flags; + spin_lock_irqsave(>lock, flags); domain_flush_tlb_pde(dom); domain_flush_complete(dom); + spin_unlock_irqrestore(>lock, flags); } static void amd_iommu_iotlb_sync(struct iommu_domain *domain, -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/6] iommu/amd: Locking Fixes
On Wed Sep 25 19, Joerg Roedel wrote: Hi, here are a couple of fixes for the amd iommu driver to fix a few locking issues around protection-domains. Main problem was that some traversals of ->dev_list were not locked in any form, causing potential race conditions. But there are more issues fixed here, for example the racy access to protection_domain->updated and races in the attach/detach_device code paths. Changes are boot-tested with lockdep enabled, looked all good so far. Please review. Thanks, Joerg Joerg Roedel (6): iommu/amd: Remove domain->updated iommu/amd: Remove amd_iommu_devtable_lock iommu/amd: Take domain->lock for complete attach/detach path iommu/amd: Check for busy devices earlier in attach_device() iommu/amd: Lock dev_data in attach/detach code paths iommu/amd: Lock code paths traversing protection_domain->dev_list drivers/iommu/amd_iommu.c | 166 drivers/iommu/amd_iommu_types.h | 4 +- 2 files changed, 85 insertions(+), 85 deletions(-) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Hi Joerg, What branch is this on top of in your repo? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Tue Dec 03 19, Lu Baolu wrote: Hi, On 12/3/19 12:13 AM, Jerry Snitselaar wrote: On Mon Dec 02 19, Jerry Snitselaar wrote: On Mon Dec 02 19, Lu Baolu wrote: Hi, On 12/2/19 2:34 PM, Jerry Snitselaar wrote: We are seeing DMAR PTE read access not set errors when booting a kernel with default passthrough, both with a test kernel and with a 5.4.0 kernel. Previously we would see a number of identity mappings being set related to the rmrrs, and now they aren't seen and we get the dmar pte errors as devices touch those regions. From what I can tell currently df4f3c603aeb ("iommu/vt-d: Remove static identity map code") removed the bit of code in init_dmars that used to set up those mappings: - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } si_domain_init now has code that sets identity maps for devices in rmrrs, but only for certain devices. On which device, are you seeing this error? Is it a rmrr locked device? Best regards, baolu Almost all of the messages are for the ilo, but there also is a message for the smart array raid bus controller. Also seeing it with a dl380 gen9 system, where the raid bus controller is getting the error. Does it help if you remove if (device_is_rmrr_locked(dev)) continue; in si_domain_init()? Unfortunately it still spits out a bunch of error messages. Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Thu Dec 05 19, Lu Baolu wrote: Hi, On 12/5/19 4:53 AM, Jerry Snitselaar wrote: Attaching console output (can't get to a point to actually log in) and config that is used to build that kernel. [...] [ 21.969477] pci :00:00.0: Adding to iommu group 0 [ 21.971390] pci :00:01.0: Adding to iommu group 1 [ 21.973173] pci :00:01.1: Adding to iommu group 2 [ 21.974930] pci :00:02.0: Adding to iommu group 3 [ 21.976672] pci :00:02.1: Adding to iommu group 4 [ 21.978446] pci :00:02.2: Adding to iommu group 5 [ 21.980224] pci :00:02.3: Adding to iommu group 6 [ 21.982096] pci :00:03.0: Adding to iommu group 7 [ 21.983868] pci :00:03.1: Adding to iommu group 8 [ 21.985644] pci :00:03.2: Adding to iommu group 9 [ 21.987484] pci :00:03.3: Adding to iommu group 10 [ 21.989830] pci :00:04.0: Adding to iommu group 11 [ 21.991738] pci :00:04.1: Adding to iommu group 11 [ 21.993557] pci :00:04.2: Adding to iommu group 11 [ 21.995360] pci :00:04.3: Adding to iommu group 11 [ 21.997145] pci :00:04.4: Adding to iommu group 11 [ 21.998915] pci :00:04.5: Adding to iommu group 11 [ 22.000694] pci :00:04.6: Adding to iommu group 11 [ 22.002569] pci :00:04.7: Adding to iommu group 11 [ 22.004556] pci :00:05.0: Adding to iommu group 12 [ 22.006388] pci :00:05.2: Adding to iommu group 12 [ 22.008186] pci :00:05.4: Adding to iommu group 12 [ 22.009968] pci :00:11.0: Adding to iommu group 13 [ 22.011815] pci :00:1a.0: Adding to iommu group 14 [ 22.013605] pci :00:1c.0: Adding to iommu group 15 [ 22.015408] pci :00:1c.7: Adding to iommu group 16 [ 22.017216] pci :00:1d.0: Adding to iommu group 17 [ 22.018991] pci :00:1e.0: Adding to iommu group 18 [ 22.021826] pci :00:1e.0: Using iommu dma mapping [ 22.023783] pci :00:1f.0: Adding to iommu group 19 [ 22.025667] pci :00:1f.2: Adding to iommu group 19 [ 22.346001] pci :03:00.0: Adding to iommu group 20 [ 22.348727] pci :03:00.0: Using iommu dma mapping [ 22.350644] pci :03:00.1: Adding to iommu group 20 [ 22.352833] pci :03:00.2: Adding to iommu group 20 [...] It seems that iommu pci bus probe didn't enumerate device [01:00.2] and [02:00.0], the corresponding context entries were not setup. Hence dma fault generated when devices access the memory. Do these two devices show in "lspci" output? How do these devices get enumerated by the system? Best regards, baolu They are there in the output, but it seems out of order: [ 22.025667] pci :00:1f.2: Adding to iommu group 19 [ 22.028569] pci :00:1f.2: DMAR: Setting identity map- 0xe8fff] [ 22.331183] pci :00:1f.2: DMAR: Setting identity map [0xf4000 - 0xf4fff] [ 22.333546] pci :00:1f.2: DMAR: Setting identity map [0xbdf6e000 - 0xbdf6efff] [ 22.336099] pci :00:1f.2: DMAR: Setting identity map [0xbdf6f000 - 0xbdf7efff] [ 22.338604] pci :00:1f.2: DMAR: Setting identity map [0xbdf7f000 - 0xbdf82fff] [ 22.341189] pci :00:1f.2: DMAR: Setting identity map [0xbdf83000 - 0xbdf84fff] [ 22.343700] pci :00:1f.2: DMAR: Device uses a private dma domain. [ 22.346001] pci :03:00.0: Adding to iommu group 20 [ 22.348727] pci :03:00.0: Using iommu dma mapping [ 22.350644] pci :03:00.1: Adding to iommu group 20 [ 22.352833] pci :03:00.2: Adding to iommu group 20 [ 22.354619] pci :03:00.3: Adding to iommu group 20 [ 22.356423] pci :02:00.0: Adding to iommu group 21 [ 22.358999] pci :02:00.0: Using iommu dma mapping [ 22.360785] pci :04:00.0: Adding to iommu group 22 [ 22.362623] pci :05:02.0: Adding to iommu group 23 [ 22.364412] pci :05:04.0: Adding to iommu group 24 [ 22.366172] pci :06:00.0: Adding to iommu group 23 [ 22.368762] pci :06:00.0: DMAR: Setting identity map [0xe8000 - 0xe8fff] [ 22.371290] pci :06:00.0: DMAR: Setting identity map [0xf4000 - 0xf4fff] [ 22.373646] pci :06:00.0: DMAR: Setting ide000 - 0xbdf6efff] [ 22.876042] pci :06:00.0: DMAR: Setting identity map [0xbdf6f000 - 0xbdf7efff] [ 22.878572] pci :06:00.0: DMAR: Setting identity map [0xbdf7f000 - 0xbdf82fff] [ 22.881167] pci :06:00.0: DMAR: Setting identity map [0xbdf83000 - 0xbdf84fff] [ 22.883729] pci :06:00.0: DMAR: Device uses a private dma domain. [ 22.885899] pci :06:00.1: Adding to iommu group 23 [ 22.888675] pci :06:00.1: DMAR: Setting identity map [0xe8000 - 0xe8fff] [ 22.891216] pci :06:00.1: DMAR: Setting identity map [0xf4000 - 0xf4fff] [ 22.893576] pci :06:00.1: DMAR: Setting identity map [0xbdf6e000 - 0xbdf6efff] [ 22.896119] pci :06:00.1: DMAR: Setting identity map [0xbdf6f000 - 0xbdf7efff] [ 22.898620] pci :06:00.1: DMAR: Setting identity map [0xbdf7f000 - 0xbdf82fff] [ 22.901232] pci :06:00.1: DMAR: Setting identity map [0xbdf83000 -
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Thu Dec 05 19, Lu Baolu wrote: Hi, On 12/5/19 10:25 AM, Jerry Snitselaar wrote: It seems that iommu pci bus probe didn't enumerate device [01:00.2] and [02:00.0], the corresponding context entries were not setup. Hence dma fault generated when devices access the memory. Do these two devices show in "lspci" output? How do these devices get enumerated by the system? Best regards, baolu They are there in the output, but it seems out of order: [ 23.446201] pci :01:00.0: Adding to iommu group 25 [ 23.448949] pci :01:00.0: Using iommu dma mapping [ 23.450807] pci :01:00.1: Adding to iommu group 25 [ 23.452666] pci :01:00.1: DMAR: Device uses a private identity domain. [ 23.455063] pci :01:00.2: Adding to iommu group 25 [ 23.456881] pci :01:00.4: Adding to iommu group 25 [ 23.458693] pci :01:00.4: DMAR: Device uses a private identity domain. Oh, yes! So device 01:00.0 01:00.1 01:00.2 01:00.4 share a single group. The default domain for this group has been set to DMA although iommu=pt has been set. As the result, .0 .2 use DMA, but .1, .4 use IDENTITY. This is not a valid configuration since all devices in a group should use a same domain. Do you mind posting the "lspci -vvv" output of these devices? I want to figure out why these devices request different domain type. Best regards, baolu 01:00.0 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support (rev 05) Subsystem: Hewlett-Packard Company iLO4 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- TAbort- SERR- TAbort- SERR- TAbort- SERR- TAbort- SERR- https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Sat Dec 07 19, Lu Baolu wrote: Hi Jerry, On 12/6/19 3:24 PM, Jerry Snitselaar wrote: On Fri Dec 06 19, Lu Baolu wrote: [snip] Can you please try below change? Let's check whether the afending address has been mapped for device 01.00.2. $ git diff diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..d9daf66be849 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -663,6 +663,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, ret = iommu_map(domain, addr, addr, pg_size, entry->prot); if (ret) goto out; + + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx] for group %d\n", addr, addr + pg_size, group->id); } } I am doubting that device 01.00.2 is not in the device scope of [ 4.485108] DMAR: RMRR base: 0x00bdf6f000 end: 0x00bdf7efff By the way, does device 01.00.2 works well after binding the driver? When I boot it with passthrough it doesn't get to a point where I can login. I think the serial console on these systems is tied to the ilo, so the conserver connection could be making things worse. Unfortunately the system is remote. I should have more time now to focus on debugging this. Attaching console output for the above patch. It seems that device 01.00.2 isn't in the scope of RMRR [base: 0x00bdf6f000 end: 0x00bdf7efff]. But it still tries to access the address within it, hence faults generated. You can check it with ACPI/DMAR table. Best regards, baolu I believe it is the 3rd endpoint device entry in dmar data below. So question about request_default_domain_for_dev. Since a dma mapping is already done for 1.00.0, and that sets the default_domain for the group (I think), won't it bail out for 1.00.2 at this check? if (group->default_domain && group->default_domain->type == type) goto out; output from lspci -t: \-[:00]-+-00.0 +-01.0-[08]-- +-01.1-[14]-- +-02.0-[03]--+-00.0 |+-00.1 |+-00.2 |\-00.3 +-02.1-[15]-- +-02.2-[02]00.0 +-02.3-[16]-- +-03.0-[04-07]00.0-[05-07]--+-02.0-[06]--+-00.0 | |\-00.1 | \-04.0-[07]--+-00.0 |\-00.1 +-03.1-[17]-- +-03.2-[18]-- +-03.3-[19]-- +-04.0 +-04.1 +-04.2 +-04.3 +-04.4 +-04.5 +-04.6 +-04.7 +-05.0 +-05.2 +-05.4 +-11.0-[1b]-- +-1a.0 +-1c.0-[0b]-- +-1c.7-[01]--+-00.0 |+-00.1 |+-00.2 |\-00.4 +-1d.0 +-1e.0-[1a]-- +-1f.0 \-1f.2 DMAR table entries for the RMRR: [302h 0770 2]Subtable Type : 0001 [Reserved Memory Region] [304h 0772 2] Length : 00CE [306h 0774 2] Reserved : [308h 0776 2] PCI Segment Number : [30Ah 0778 8] Base Address : BDF6F000 [312h 0786 8] End Address (limit) : BDF7EFFF [31Ah 0794 1]Device Scope Type : 01 [PCI Endpoint Device] [31Bh 0795 1] Entry Length : 0A [31Ch 0796 2] Reserved : [31Eh 0798 1] Enumeration ID : 00 [31Fh 0799 1] PCI Bus Number : 00 [320h 0800 2] PCI Path : 02,02 [322h 0802 2] PCI Path : 00,00 [324h 0804 1]Device Scope Type : 01 [PCI Endpoint Device] [325h 0805 1] Entry Length : 0A [326h 0806 2] Reserved : [328h 0808 1] Enumeration ID : 00 [329h 0809 1] PCI Bus Number : 00 [32Ah 0810 2] PCI Path : 1C,07 [32Ch 0812 2] PCI Path : 00,00 [32Eh 0814 1]Device Scope Type : 01 [PCI Endpoint Device] [32Fh 0815 1] Entry Length : 0A [330h 0816 2] Reserved : [332h 0818 1] Enumeration ID : 00 [333h 0819 1] PCI Bus Number : 00 [334h 0820 2] PCI Path : 1C,07 [336h 0822 2] PCI Path : 00,02 [338h 0824 1]Device Scope Type : 01 [PCI Endpoint Device] [339h 0825 1] Entry Length : 08 [33Ah 0826 2] Reserved : [33Ch 0828 1] Enumeration ID : 00 [33Dh 0829 1] PC
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Fri Dec 06 19, Jerry Snitselaar wrote: On Sat Dec 07 19, Lu Baolu wrote: Hi Jerry, On 12/6/19 3:24 PM, Jerry Snitselaar wrote: On Fri Dec 06 19, Lu Baolu wrote: [snip] Can you please try below change? Let's check whether the afending address has been mapped for device 01.00.2. $ git diff diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..d9daf66be849 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -663,6 +663,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, ret = iommu_map(domain, addr, addr, pg_size, entry->prot); if (ret) goto out; + + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx] for group %d\n", addr, addr + pg_size, group->id); } } I am doubting that device 01.00.2 is not in the device scope of [ 4.485108] DMAR: RMRR base: 0x00bdf6f000 end: 0x00bdf7efff By the way, does device 01.00.2 works well after binding the driver? When I boot it with passthrough it doesn't get to a point where I can login. I think the serial console on these systems is tied to the ilo, so the conserver connection could be making things worse. Unfortunately the system is remote. I should have more time now to focus on debugging this. Attaching console output for the above patch. It seems that device 01.00.2 isn't in the scope of RMRR [base: 0x00bdf6f000 end: 0x00bdf7efff]. But it still tries to access the address within it, hence faults generated. You can check it with ACPI/DMAR table. Best regards, baolu I believe it is the 3rd endpoint device entry in dmar data below. So question about request_default_domain_for_dev. Since a dma mapping is already done for 1.00.0, and that sets the default_domain for the group (I think), won't it bail out for 1.00.2 at this check? if (group->default_domain && group->default_domain->type == type) goto out; Or I guess request_default_domain_for_dev wouldn't even be called for 1.00.2. intel_iommu_add_device it wouldn't even call one of the request functions with 1.00.2 since domain->type would be dma from 1.00.0, and device_def_domain_type should return dma. output from lspci -t: \-[:00]-+-00.0 +-01.0-[08]-- +-01.1-[14]-- +-02.0-[03]--+-00.0 |+-00.1 |+-00.2 |\-00.3 +-02.1-[15]-- +-02.2-[02]00.0 +-02.3-[16]-- +-03.0-[04-07]00.0-[05-07]--+-02.0-[06]--+-00.0 | |\-00.1 | \-04.0-[07]--+-00.0 |\-00.1 +-03.1-[17]-- +-03.2-[18]-- +-03.3-[19]-- +-04.0 +-04.1 +-04.2 +-04.3 +-04.4 +-04.5 +-04.6 +-04.7 +-05.0 +-05.2 +-05.4 +-11.0-[1b]-- +-1a.0 +-1c.0-[0b]-- +-1c.7-[01]--+-00.0 |+-00.1 |+-00.2 |\-00.4 +-1d.0 +-1e.0-[1a]-- +-1f.0 \-1f.2 DMAR table entries for the RMRR: [302h 0770 2]Subtable Type : 0001 [Reserved Memory Region] [304h 0772 2] Length : 00CE [306h 0774 2] Reserved : [308h 0776 2] PCI Segment Number : [30Ah 0778 8] Base Address : BDF6F000 [312h 0786 8] End Address (limit) : BDF7EFFF [31Ah 0794 1]Device Scope Type : 01 [PCI Endpoint Device] [31Bh 0795 1] Entry Length : 0A [31Ch 0796 2] Reserved : [31Eh 0798 1] Enumeration ID : 00 [31Fh 0799 1] PCI Bus Number : 00 [320h 0800 2] PCI Path : 02,02 [322h 0802 2] PCI Path : 00,00 [324h 0804 1]Device Scope Type : 01 [PCI Endpoint Device] [325h 0805 1] Entry Length : 0A [326h 0806 2] Reserved : [328h 0808 1] Enumeration ID : 00 [329h 0809 1] PCI Bus Number : 00 [32Ah 0810 2] PCI Path : 1C,07 [32Ch 0812 2] PCI Path : 00,00 [32Eh 0814 1]Device Scope Type : 01 [PCI Endpoint Device] [32Fh 0815 1] Entry Length : 0A [330h 0816 2] Reserved : [332h 0818 1] Enumeration ID : 00 [333h 0819 1] PCI Bus Number : 00 [334h 0820 2] PCI Path : 1C,07 [336h 0822 2]
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Tue Dec 10 19, Lu Baolu wrote: Hi, On 12/10/19 1:18 PM, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: [snip] A call to iommu_map is failing. [ 36.686881] pci :01:00.2: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 36.689843] pci :01:00.2: iommu_group_create_direct_mappings: iterating through mappings [ 36.692757] pci :01:00.2: iommu_group_create_direct_mappings: calling apply_resv_region [ 36.695526] pci :01:00.2: e_direct_mappings: entry type is direct [ 37.198053] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.201357] pci :01:00.2: iommu_group_create_direct_mappings: iommu_map failed [ 37.203973] pci :01:00.2: iommu_group_create_direct_mappings: leaving func [ 37.206385] pci :01:00.2: iommu_group_add_device: calling __iommu_attach_device [ 37.208950] pci :01:00.2: Adding to iommu group 25 [ 37.210660] pci :01:00.2: DMAR: domain->type is dma It bails at the dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN check at the beginning of intel_iommu_map. I will verify, but it looks like that is getting set when intel_iommu_add_device is called for 01:00.1. request_default_domain_for_dev for 01:00.1 will return -EBUSY because iommu_group_device_count(group) != 1. Okay, I will send you a fix patch later. Thanks! Best regards, baolu One issue I see is: [ 38.869182] uhci_hcd :01:00.4: UHCI Host Controller [ 39.371173] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 39.373708] uhci_hcd :01:00.4: detected 8 ports [ 39.375333] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 39.377820] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 39.379921] uhci_hcd :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 39.382269] uhci_hcd :01:00.4: unable to allocate consistent memory for frame list [ 39.384920] uhci_hcd :01:00.4: startup error -16 [ 39.386619] uhci_hcd :01:00.4: USB bus 3 deregistered [ 39.388640] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 39.390616] uhci_hcd: probe of :01:00.4 failed with error -16 I'm not sure if this is related to the flag and what is allowed now by the api. I need to go look at the code to see what it is doing. I'll try debugging it tonight. Regards, Jerry Also fails for 01:00.4: [ 37.212448] pci :01:00.4: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 37.215382] pci :01:00.4: iommu_group_create_direct_mappings: iterating through mappings [ 37.218170] pci :01:00.4: iommu_group_create_direct_mappings: calling apply_resv_region [ 37.220933] pci :01:00.4: iommu_group_create_direct_mappings: entry type is direct-relaxable [ 37.223932] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.226857] pci :01:00.4: iommu_group_create_direct_mappings: iommu_map failed [ 37.229300] pci :01:00.4: iommu_group_create_direct_mappings: leaving func [ 37.231648] pci :01:00.4: iommu_group_add_device: calling __iommu_attach_device [ 37.234194] pci :01:00.4: Adding to iommu group 25 [ 37.236192] pci :01:00.4: DMAR: domain->type is dma [ 37.237958] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain [ 37.241061] pci :01:00.4: don't change mappings of existing d37.489870] pci :01:00.4: DMAR: Device uses a private identity domain. There is an RMRR for 0xbddde000-0xefff: [63Ah 1594 2] Subtable Type : 0001 [Reserved Memory Region] [63Ch 1596 2] Length : 0036 [63Eh 1598 2] Reserved : [640h 1600 2] PCI Segment Number : [642h 1602 8] Base Address : BDDDE000 [64Ah 1610 8] End Address (limit) : BDDDEFFF [652h 1618 1] Device Scope Type : 01 [PCI Endpoint Device] [653h 1619 1] Entry Length : 0A [654h 1620 2] Reserved : [656h 1622 1] Enumeration ID : 00 [657h 1623 1] PCI Bus Number : 00 [658h 1624 2] PCI Path : 1C,07 [65Ah 1626 2] PCI Path : 00,00 [65Ch 1628 1] Device Scope Type : 01 [PCI Endpoint Device] [65Dh 1629 1] Entry Length : 0A [65Eh 1630 2] Reserved : [660h 1632 1] Enumeration ID : 00 [661h 1633 1] PCI Bus Number : 00 [662h 1634 2] PCI Path : 1C,07 [664h 1636 2] PCI Path : 00,02 [666h 1638 1] Device Scope Type : 01 [PCI Endpoint Device] [667h 1639 1] Entry Length : 0A [668h 1640 2] Reserved : [66Ah 1642 1] Enumeration ID : 00 [66Bh 1643 1]
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Thu Dec 12 19, Lu Baolu wrote: Hi, On 12/12/19 9:49 AM, Jerry Snitselaar wrote: On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Can you please try this fix and check whether it can fix your problem? If it helps, do you mind adding a Tested-by? Best regards, baolu Tested-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..b73bebea9148 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5478,9 +5478,6 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return -EINVAL; - if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5523,8 +5520,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, )); - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5556,9 +5551,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, ); if (pte) phys = dma_pte_addr(pte); -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/vt-d: Set ISA bridge reserved region as relaxable
On Wed Dec 11 19, Alex Williamson wrote: Commit d850c2ee5fe2 ("iommu/vt-d: Expose ISA direct mapping region via iommu_get_resv_regions") created a direct-mapped reserved memory region in order to replace the static identity mapping of the ISA address space, where the latter was then removed in commit df4f3c603aeb ("iommu/vt-d: Remove static identity map code"). According to the history of this code and the Kconfig option surrounding it, this direct mapping exists for the benefit of legacy ISA drivers that are not compatible with the DMA API. In conjuntion with commit 9b77e5c79840 ("vfio/type1: check dma map request is within a valid iova range") this change introduced a regression where the vfio IOMMU backend enforces reserved memory regions per IOMMU group, preventing userspace from creating IOMMU mappings conflicting with prescribed reserved regions. A necessary prerequisite for the vfio change was the introduction of "relaxable" direct mappings introduced by commit adfd37382090 ("iommu: Introduce IOMMU_RESV_DIRECT_RELAXABLE reserved memory regions"). These relaxable direct mappings provide the same identity mapping support in the default domain, but also indicate that the reservation is software imposed and may be relaxed under some conditions, such as device assignment. Convert the ISA bridge direct-mapped reserved region to relaxable to reflect that the restriction is self imposed and need not be enforced by drivers such as vfio. Fixes: d850c2ee5fe2 ("iommu/vt-d: Expose ISA direct mapping region via iommu_get_resv_regions") Cc: sta...@vger.kernel.org # v5.3+ Link: https://lore.kernel.org/linux-iommu/20191211082304.2d4fa...@x1.home Reported-by: cprt Tested-by: cprt Signed-off-by: Alex Williamson Tested-by: Jerry Snitselaar Reviewed-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..6eb0dd7489a1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5737,7 +5737,7 @@ static void intel_iommu_get_resv_regions(struct device *device, if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { reg = iommu_alloc_resv_region(0, 1UL << 24, 0, - IOMMU_RESV_DIRECT); + IOMMU_RESV_DIRECT_RELAXABLE); if (reg) list_add_tail(>list, head); } ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Fri Dec 13 19, Lu Baolu wrote: Hi, On 12/13/19 8:30 AM, Jerry Snitselaar wrote: On Thu Dec 12 19, Lu Baolu wrote: Hi, On 12/12/19 9:49 AM, Jerry Snitselaar wrote: On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Can you please try this fix and check whether it can fix your problem? If it helps, do you mind adding a Tested-by? Best regards, baolu I'm testing with this patch, my patch that moves the direct mapping call, and Alex's patch for the ISA bridge. It solved the 2 iommu mapping errors I was seeing with default passthrough, I no longer see all the dmar pte read access errors, and the system boots allowing me to login. I'm tracking down 2 issues at the moment. With passthrough I see a problem with 01:00.4 that I mentioned in the earlier email: [ 78.978573] uhci_hcd: USB Universal Host Controller Interface driver [ 78.980842] uhci_hcd :01:00.4: UHCI Host Controller [ 78.982738] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 78.985222] uhci_hcd :01:00.4: detected 8 ports [ 78.986907] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 78.989316] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 78.994634] uhci_hcd :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 7 :01:00.4: unable to allocate consistent memory for frame list [ 79.499891] uhci_hcd :01:00.4: startup error -16 [ 79.501588] uhci_hcd :01:00.4: USB bus 3 deregistered [ 79.503494] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 79.505497] uhci_hcd: probe of :01:00.4 failed with error -16 If I boot the system with iommu=nopt I see an iommu map failure due to the prot check in __domain_mapping: [ 40.940589] pci :00:1f.0: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 40.943558] pci :00:1f.0: iommu_group_create_direct_mappings: iterating through mappings [ 40.946402] pci :00:1f.0: iommu_group_create_direct_mappings: calling apply_resv_region [ 40.949184] pci :00:1f.0: iommu_group_create_direct_mappings: entry type is direct [ 40.951819] DMAR: intel_iommu_map: enter [ 40.953128] DMAR: __domain_mapping: prot & (DMA_PTE_READ|DMA_PTE_WRITE) == 0 [ 40.955486] DMAR: domain_mapping: __domain_mapping failed [ 40.957348] DMAR: intel_iommu_map: domain_pfn_mapping returned -22 [ 40.959466] DMAR: intel_iommu_map: leave [ 40.959468] iommu: iommu_map: ops->map failed iova 0x0 pa 0x pgsize 0x1000 [ 40.963511] pci :00:1f.0: iommu_group_create_direct_mappings: iommu_map failed [ 40.966026] pci :00:1f.0: iommu_group_create_direct_mappings: leaving func [ 40.968487] pci :00:1f.0: iommu_group_add_device: calling __iommu_attach_device [ 40.971016] pci :00:1f.0: Adding to iommu group 19 [ 40.972731] pci :00:1f.0: DMAR: domain->type is dma /sys/kernel/iommu_groups/19 [root@hp-dl388g8-07 19]# cat reserved_regions 0x 0x00ff direct 0xbdf6e000 0xbdf84fff direct 0xfee0 0xfeef msi 00:1f.0 ISA bridge: Intel Corporation C600/X79 series chipset LPC Controller This seems to be another issue? Best regards, baolu In intel_iommu_get_resv_regions this iommu_alloc_resv_region is called with prot set to 0: if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { reg = iommu_alloc_resv_region(0, 1UL << 24, 0, IOMMU_RESV_DIRECT_RELAXABLE); if (reg) I wonder if this is an issue with the region starting at 0x0 and this bit in iommu_group_create_mappings: phys_addr = iommu_iova_to_phys(domain, addr); if (phys_addr) continue; Off to stick in some more debugging statements. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Thu Dec 12 19, Lu Baolu wrote: Hi, On 12/12/19 9:49 AM, Jerry Snitselaar wrote: On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Can you please try this fix and check whether it can fix your problem? If it helps, do you mind adding a Tested-by? Best regards, baolu I'm testing with this patch, my patch that moves the direct mapping call, and Alex's patch for the ISA bridge. It solved the 2 iommu mapping errors I was seeing with default passthrough, I no longer see all the dmar pte read access errors, and the system boots allowing me to login. I'm tracking down 2 issues at the moment. With passthrough I see a problem with 01:00.4 that I mentioned in the earlier email: [ 78.978573] uhci_hcd: USB Universal Host Controller Interface driver [ 78.980842] uhci_hcd :01:00.4: UHCI Host Controller [ 78.982738] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 78.985222] uhci_hcd :01:00.4: detected 8 ports [ 78.986907] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 78.989316] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 78.994634] uhci_hcd :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 7 :01:00.4: unable to allocate consistent memory for frame list [ 79.499891] uhci_hcd :01:00.4: startup error -16 [ 79.501588] uhci_hcd :01:00.4: USB bus 3 deregistered [ 79.503494] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 79.505497] uhci_hcd: probe of :01:00.4 failed with error -16 If I boot the system with iommu=nopt I see an iommu map failure due to the prot check in __domain_mapping: [ 40.940589] pci :00:1f.0: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 40.943558] pci :00:1f.0: iommu_group_create_direct_mappings: iterating through mappings [ 40.946402] pci :00:1f.0: iommu_group_create_direct_mappings: calling apply_resv_region [ 40.949184] pci :00:1f.0: iommu_group_create_direct_mappings: entry type is direct [ 40.951819] DMAR: intel_iommu_map: enter [ 40.953128] DMAR: __domain_mapping: prot & (DMA_PTE_READ|DMA_PTE_WRITE) == 0 [ 40.955486] DMAR: domain_mapping: __domain_mapping failed [ 40.957348] DMAR: intel_iommu_map: domain_pfn_mapping returned -22 [ 40.959466] DMAR: intel_iommu_map: leave [ 40.959468] iommu: iommu_map: ops->map failed iova 0x0 pa 0x pgsize 0x1000 [ 40.963511] pci :00:1f.0: iommu_group_create_direct_mappings: iommu_map failed [ 40.966026] pci :00:1f.0: iommu_group_create_direct_mappings: leaving func [ 40.968487] pci :00:1f.0: iommu_group_add_device: calling __iommu_attach_device [ 40.971016] pci :00:1f.0: Adding to iommu group 19 [ 40.972731] pci :00:1f.0: DMAR: domain->type is dma /sys/kernel/iommu_groups/19 [root@hp-dl388g8-07 19]# cat reserved_regions 0x 0x00ff direct 0xbdf6e000 0xbdf84fff direct 0xfee0 0xfeef msi 00:1f.0 ISA bridge: Intel Corporation C600/X79 series chipset LPC Controller --- drivers/iommu/intel-iommu.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..b73bebea9148 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5478,9 +5478,6 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return -EINVAL; - if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5523,8 +5520,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, )); - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5556,9 +5551,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - if (dm
[PATCH] iommu/vt-d: Allocate reserved region for ISA with correct permission
Currently the reserved region for ISA is allocated with no permissions. If a dma domain is being used, mapping this region will fail. Set the permissions to DMA_PTE_READ|DMA_PTE_WRITE. Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: sta...@vger.kernel.org # v5.3+ Fixes: d850c2ee5fe2 ("iommu/vt-d: Expose ISA direct mapping region via iommu_get_resv_regions") Signed-off-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..998529cebcf2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5736,7 +5736,7 @@ static void intel_iommu_get_resv_regions(struct device *device, struct pci_dev *pdev = to_pci_dev(device); if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { - reg = iommu_alloc_resv_region(0, 1UL << 24, 0, + reg = iommu_alloc_resv_region(0, 1UL << 24, prot, IOMMU_RESV_DIRECT); if (reg) list_add_tail(>list, head); -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Thu Dec 12 19, Jerry Snitselaar wrote: On Fri Dec 13 19, Lu Baolu wrote: Hi, On 12/13/19 8:30 AM, Jerry Snitselaar wrote: On Thu Dec 12 19, Lu Baolu wrote: Hi, On 12/12/19 9:49 AM, Jerry Snitselaar wrote: On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar Can you please try this fix and check whether it can fix your problem? If it helps, do you mind adding a Tested-by? Best regards, baolu I'm testing with this patch, my patch that moves the direct mapping call, and Alex's patch for the ISA bridge. It solved the 2 iommu mapping errors I was seeing with default passthrough, I no longer see all the dmar pte read access errors, and the system boots allowing me to login. I'm tracking down 2 issues at the moment. With passthrough I see a problem with 01:00.4 that I mentioned in the earlier email: [ 78.978573] uhci_hcd: USB Universal Host Controller Interface driver [ 78.980842] uhci_hcd :01:00.4: UHCI Host Controller [ 78.982738] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 78.985222] uhci_hcd :01:00.4: detected 8 ports [ 78.986907] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 78.989316] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 78.994634] uhci_hcd :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 7 :01:00.4: unable to allocate consistent memory for frame list [ 79.499891] uhci_hcd :01:00.4: startup error -16 [ 79.501588] uhci_hcd :01:00.4: USB bus 3 deregistered [ 79.503494] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 79.505497] uhci_hcd: probe of :01:00.4 failed with error -16 If I boot the system with iommu=nopt I see an iommu map failure due to the prot check in __domain_mapping: [ 40.940589] pci :00:1f.0: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 40.943558] pci :00:1f.0: iommu_group_create_direct_mappings: iterating through mappings [ 40.946402] pci :00:1f.0: iommu_group_create_direct_mappings: calling apply_resv_region [ 40.949184] pci :00:1f.0: iommu_group_create_direct_mappings: entry type is direct [ 40.951819] DMAR: intel_iommu_map: enter [ 40.953128] DMAR: __domain_mapping: prot & (DMA_PTE_READ|DMA_PTE_WRITE) == 0 [ 40.955486] DMAR: domain_mapping: __domain_mapping failed [ 40.957348] DMAR: intel_iommu_map: domain_pfn_mapping returned -22 [ 40.959466] DMAR: intel_iommu_map: leave [ 40.959468] iommu: iommu_map: ops->map failed iova 0x0 pa 0x pgsize 0x1000 [ 40.963511] pci :00:1f.0: iommu_group_create_direct_mappings: iommu_map failed [ 40.966026] pci :00:1f.0: iommu_group_create_direct_mappings: leaving func [ 40.968487] pci :00:1f.0: iommu_group_add_device: calling __iommu_attach_device [ 40.971016] pci :00:1f.0: Adding to iommu group 19 [ 40.972731] pci :00:1f.0: DMAR: domain->type is dma /sys/kernel/iommu_groups/19 [root@hp-dl388g8-07 19]# cat reserved_regions 0x 0x00ff direct 0xbdf6e000 0xbdf84fff direct 0xfee0 0xfeef msi 00:1f.0 ISA bridge: Intel Corporation C600/X79 series chipset LPC Controller This seems to be another issue? Best regards, baolu In intel_iommu_get_resv_regions this iommu_alloc_resv_region is called with prot set to 0: if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { reg = iommu_alloc_resv_region(0, 1UL << 24, 0, IOMMU_RESV_DIRECT_RELAXABLE); if (reg) Looking at the older code for the ISA bridge it looks like it called iommu_prepare_identity_map -> domain_prepare_identity_map -> iommu_domain_identity_map -> and finally __domain_mapping with DMA_PTE_READ|DMA_PTE_WRITE? I wonder if this is an issue with the region starting at 0x0 and this bit in iommu_group_create_mappings: phys_addr = iommu_iova_to_phys(domain, addr); if (phys_addr) continue; Disregard this Off to stick in some more debugging statements. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.o
Re: panic in dmar_remove_one_dev_info
On Mon Dec 16 19, Jerry Snitselaar wrote: HP is seeing a panic on gen9 dl360 and dl560 while testing these other changes we've been eorking on. I just took an initial look, but have to run to a dentist appointment so couldn't dig too deep. It looks like the device sets dev->archdata.iommu to DEFER_DEVICE_DOMAIN_INFO in intel_iommu_add_device, and then it needs a private domain so dmar_remove_one_dev_info gets called. That code path ends up trying to use DEFER_DEVICE_DOMAIN_INFO as a pointer. I don't need if there just needs to be a check in there to bail out if it sees DEFER_DEVICE_DOMAIN_INFO, or if something more is needed. I'll look at it some more when I get back home. Regards, Jerry Hi Baolu, Does this look sane? --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(_domain_lock, flags); info = dev->archdata.iommu; - if (info) + if (info && info != DEFER_DEVICE_DOMAIN_INFO + && info != DUMMY_DEVICE_DOMAIN_INFO) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(_domain_lock, flags); } Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH] iommu/vt-d: avoid panic in __dmar_remove_one_dev_info
In addition to checking for a null pointer, verify that info does not have the value DEFER_DEVICE_DOMAIN_INFO or DUMMY_DEVICE_DOMAIN_INFO. If info has one of those values __dmar_remove_one_dev_info will panic when trying to access a member of the device_domain_info struct. [1.464241] BUG: unable to handle kernel NULL pointer dereference at 004e [1.464241] PGD 0 P4D 0 [1.464241] Oops: [#1] SMP PTI [1.464241] CPU: 0 PID: 1 Comm: swapper/0 Tainted: GW - - - 4.18.0-160.el8.x86_64 #1 [1.464241] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [1.464241] RIP: 0010:__dmar_remove_one_dev_info+0x27/0x250 [1.464241] Code: 00 00 00 0f 1f 44 00 00 8b 05 35 ec 75 01 41 56 41 55 41 54 55 53 85 c0 0f 84 99 01 00 00 48 85 ff 0f 84 92 01 00 00 48 89 fb <4c> 8b 67 50 48 8b 6f 58 $ [1.464241] RSP: :c90dfd10 EFLAGS: 00010082 [1.464241] RAX: 0001 RBX: fffe RCX: [1.464241] RDX: 0001 RSI: 0004 RDI: fffe [1.464241] RBP: 88ec7a72f368 R08: 0457 R09: 0039 [1.464241] R10: R11: c90dfa58 R12: 88ec7a0eec20 [1.464241] R13: 88ec6fd0eab0 R14: 81eae980 R15: [1.464241] FS: () GS:88ec7a60() knlGS: [1.464241] CS: 0010 DS: ES: CR0: 80050033 [1.464241] CR2: 004e CR3: 006c7900a001 C 001606b0 [1.464241] Call Trace: [1.464241] dmar_remove_one_dev_info.isra.68+0x27/0x40 [1.464241] intel_iommu_add_device+0x124/0x180 [1.464241] ? iommu_probe_device+0x40/0x40 [1.464241] add_iommu_group+0xa/0x20 [1.464241] bus_for_each_dev+0x77/0xc0 [1.464241] ? down_write+0xe/0x40 [1.464241] bus_set_iommu+0x85/0xc0 [1.464241] intel_iommu_init+0x4b4/0x777 [1.464241] ? e820__memblock_setup+0x63/0x63 [1.464241] ? do_early_param+0x91/0x91 [1.464241] pci_iommu_init+0x19/0x45 [1.464241] do_one_initcall+0x46/0x1c3 [1.464241] ? do_early_param+0x91/0x91 [1.464241] kernel_init_freeable+0x1af/0x258 [1.464241] ? rest_init+0xaa/0xaa [1.464241] kernel_init+0xa/0x107 [1.464241] ret_from_fork+0x35/0x40 [1.464241] Modules linked in: [1.464241] CR2: 004e [1.464241] ---[ end trace 0927d2ba8b8032b5 ]--- Cc: Joerg Roedel Cc: Lu Baolu Cc: David Woodhouse Cc: sta...@vger.kernel.org # v5.3+ Cc: iommu@lists.linux-foundation.org Fixes: ae23bfb68f28 ("iommu/vt-d: Detach domain before using a private one") Signed-off-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..e42a09794fa2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(_domain_lock, flags); info = dev->archdata.iommu; - if (info) + if (info && info != DEFER_DEVICE_DOMAIN_INFO + && info != DUMMY_DEVICE_DOMAIN_INFO) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(_domain_lock, flags); } -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] iommu/vt-d: avoid panic in __dmar_remove_one_dev_info
On Tue, Dec 17, 2019 at 10:56 AM Jerry Snitselaar wrote: > > In addition to checking for a null pointer, verify that > info does not have the value DEFER_DEVICE_DOMAIN_INFO or > DUMMY_DEVICE_DOMAIN_INFO. If info has one of those values > __dmar_remove_one_dev_info will panic when trying to access > a member of the device_domain_info struct. > > [1.464241] BUG: unable to handle kernel NULL pointer dereference at > 004e > [1.464241] PGD 0 P4D 0 > [1.464241] Oops: [#1] SMP PTI > [1.464241] CPU: 0 PID: 1 Comm: swapper/0 Tainted: GW > - - - 4.18.0-160.el8.x86_64 #1 > [1.464241] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, > BIOS P89 07/21/2019 > [1.464241] RIP: 0010:__dmar_remove_one_dev_info+0x27/0x250 > [1.464241] Code: 00 00 00 0f 1f 44 00 00 8b 05 35 ec 75 01 41 56 41 > 55 41 54 55 53 85 c0 0f 84 99 01 00 00 48 85 ff 0f 84 92 01 00 00 48 89 fb > <4c> 8b 67 50 48 8b 6f 58 $ > [1.464241] RSP: :c90dfd10 EFLAGS: 00010082 > [1.464241] RAX: 0001 RBX: fffe RCX: > > [1.464241] RDX: 0001 RSI: 0004 RDI: > fffe > [1.464241] RBP: 88ec7a72f368 R08: 0457 R09: > 0039 > [1.464241] R10: R11: c90dfa58 R12: > 88ec7a0eec20 > [1.464241] R13: 88ec6fd0eab0 R14: 81eae980 R15: > > [1.464241] FS: () GS:88ec7a60() > knlGS: > [1.464241] CS: 0010 DS: ES: CR0: 80050033 > [1.464241] CR2: 004e CR3: 006c7900a001 C > 001606b0 > [1.464241] Call Trace: > [1.464241] dmar_remove_one_dev_info.isra.68+0x27/0x40 > [1.464241] intel_iommu_add_device+0x124/0x180 > [1.464241] ? iommu_probe_device+0x40/0x40 > [1.464241] add_iommu_group+0xa/0x20 > [1.464241] bus_for_each_dev+0x77/0xc0 > [1.464241] ? down_write+0xe/0x40 > [1.464241] bus_set_iommu+0x85/0xc0 > [1.464241] intel_iommu_init+0x4b4/0x777 > [1.464241] ? e820__memblock_setup+0x63/0x63 > [1.464241] ? do_early_param+0x91/0x91 > [1.464241] pci_iommu_init+0x19/0x45 > [1.464241] do_one_initcall+0x46/0x1c3 > [1.464241] ? do_early_param+0x91/0x91 > [1.464241] kernel_init_freeable+0x1af/0x258 > [1.464241] ? rest_init+0xaa/0xaa > [1.464241] kernel_init+0xa/0x107 > [1.464241] ret_from_fork+0x35/0x40 > [1.464241] Modules linked in: > [1.464241] CR2: 004e > [1.464241] ---[ end trace 0927d2ba8b8032b5 ]--- > > Cc: Joerg Roedel > Cc: Lu Baolu > Cc: David Woodhouse > Cc: sta...@vger.kernel.org # v5.3+ > Cc: iommu@lists.linux-foundation.org > Fixes: ae23bfb68f28 ("iommu/vt-d: Detach domain before using a private one") > Signed-off-by: Jerry Snitselaar > --- > drivers/iommu/intel-iommu.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 0c8d81f56a30..e42a09794fa2 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) > > spin_lock_irqsave(_domain_lock, flags); > info = dev->archdata.iommu; > - if (info) > + if (info && info != DEFER_DEVICE_DOMAIN_INFO > + && info != DUMMY_DEVICE_DOMAIN_INFO) > __dmar_remove_one_dev_info(info); > spin_unlock_irqrestore(_domain_lock, flags); > } > -- > 2.24.0 > > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu > I'm not positive that the DUMMY_DEVICE_DOMAIN_INFO check is needed. It seemed like there were checks for that most places before dmar_remove_one_dev_info would be called, but I wasn't certain. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] iommu/vt-d: avoid panic in __dmar_remove_one_dev_info
On Tue Dec 17 19, Jerry Snitselaar wrote: On Tue Dec 17 19, Jerry Snitselaar wrote: In addition to checking for a null pointer, verify that info does not have the value DEFER_DEVICE_DOMAIN_INFO or DUMMY_DEVICE_DOMAIN_INFO. If info has one of those values __dmar_remove_one_dev_info will panic when trying to access a member of the device_domain_info struct. [1.464241] BUG: unable to handle kernel NULL pointer dereference at 004e [1.464241] PGD 0 P4D 0 [1.464241] Oops: [#1] SMP PTI [1.464241] CPU: 0 PID: 1 Comm: swapper/0 Tainted: GW - - - 4.18.0-160.el8.x86_64 #1 [1.464241] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [1.464241] RIP: 0010:__dmar_remove_one_dev_info+0x27/0x250 [1.464241] Code: 00 00 00 0f 1f 44 00 00 8b 05 35 ec 75 01 41 56 41 55 41 54 55 53 85 c0 0f 84 99 01 00 00 48 85 ff 0f 84 92 01 00 00 48 89 fb <4c> 8b 67 50 48 8b 6f 58 $ [1.464241] RSP: :c90dfd10 EFLAGS: 00010082 [1.464241] RAX: 0001 RBX: fffe RCX: [1.464241] RDX: 0001 RSI: 0004 RDI: fffe [1.464241] RBP: 88ec7a72f368 R08: 0457 R09: 0039 [1.464241] R10: R11: c90dfa58 R12: 88ec7a0eec20 [1.464241] R13: 88ec6fd0eab0 R14: 81eae980 R15: [1.464241] FS: () GS:88ec7a60() knlGS: [1.464241] CS: 0010 DS: ES: CR0: 80050033 [1.464241] CR2: 004e CR3: 006c7900a001 C 001606b0 [1.464241] Call Trace: [1.464241] dmar_remove_one_dev_info.isra.68+0x27/0x40 [1.464241] intel_iommu_add_device+0x124/0x180 [1.464241] ? iommu_probe_device+0x40/0x40 [1.464241] add_iommu_group+0xa/0x20 [1.464241] bus_for_each_dev+0x77/0xc0 [1.464241] ? down_write+0xe/0x40 [1.464241] bus_set_iommu+0x85/0xc0 [1.464241] intel_iommu_init+0x4b4/0x777 [1.464241] ? e820__memblock_setup+0x63/0x63 [1.464241] ? do_early_param+0x91/0x91 [1.464241] pci_iommu_init+0x19/0x45 [1.464241] do_one_initcall+0x46/0x1c3 [1.464241] ? do_early_param+0x91/0x91 [1.464241] kernel_init_freeable+0x1af/0x258 [1.464241] ? rest_init+0xaa/0xaa [1.464241] kernel_init+0xa/0x107 [1.464241] ret_from_fork+0x35/0x40 [1.464241] Modules linked in: [1.464241] CR2: 004e [1.464241] ---[ end trace 0927d2ba8b8032b5 ]--- Cc: Joerg Roedel Cc: Lu Baolu Cc: David Woodhouse Cc: sta...@vger.kernel.org # v5.3+ Cc: iommu@lists.linux-foundation.org Fixes: ae23bfb68f28 ("iommu/vt-d: Detach domain before using a private one") Signed-off-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..e42a09794fa2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(_domain_lock, flags); info = dev->archdata.iommu; - if (info) + if (info && info != DEFER_DEVICE_DOMAIN_INFO + && info != DUMMY_DEVICE_DOMAIN_INFO) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(_domain_lock, flags); } -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Nack this. Apparently the issue is just being seen with the kdump kernel. I'm wondering if it is already solved by 6c3a44ed3c55 ("iommu/vt-d: Turn off translations at shutdown"). Testing a 5.5 build now. And a minute later I got a response. The 5.5 kernel hits the original panic when booting into the kdump kernel. I need to test with this patch on 5.5, but with a test build of our kernel with this patch the problem just moves to: [3.742317] pci :01:00.0: Using iommu dma mapping [3.744020] pci :01:00.1: Adding to iommu group 86 [3.746697] NMI watchdog: Watchdog detected hard LOCKUP on cpu 0Modules linked in: [3.746697] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-167.el8.iommu6.x86_64 #1 [3.746697] Hardware name: HP ProLiant DL560 Gen9/ProLiant DL560 Gen9, BIOS P85 07/21/2019 [3.746697] RIP: 0010:native_queued_spin_lock_slowpath+0x5d/0x1d0 [3.746697] Code: 0f ba 2f 08 0f 92 c0 0f b6 c0 c1 e0 08 89 c2 8b 07 30 e4 09 d0 a9 00 01 ff ff 75 47 85 c$ [3.746697] RSP: :c90f3bd8 EFLAGS: 0002 [3.746697] RAX: 0101 RBX: 0046 RCX: 7f17 [3.746697] RDX: RSI: RDI: 82e8a600 [3.746697] RB
Re: [RFC PATCH] iommu/vt-d: avoid panic in __dmar_remove_one_dev_info
On Tue Dec 17 19, Jerry Snitselaar wrote: In addition to checking for a null pointer, verify that info does not have the value DEFER_DEVICE_DOMAIN_INFO or DUMMY_DEVICE_DOMAIN_INFO. If info has one of those values __dmar_remove_one_dev_info will panic when trying to access a member of the device_domain_info struct. [1.464241] BUG: unable to handle kernel NULL pointer dereference at 004e [1.464241] PGD 0 P4D 0 [1.464241] Oops: [#1] SMP PTI [1.464241] CPU: 0 PID: 1 Comm: swapper/0 Tainted: GW - - - 4.18.0-160.el8.x86_64 #1 [1.464241] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [1.464241] RIP: 0010:__dmar_remove_one_dev_info+0x27/0x250 [1.464241] Code: 00 00 00 0f 1f 44 00 00 8b 05 35 ec 75 01 41 56 41 55 41 54 55 53 85 c0 0f 84 99 01 00 00 48 85 ff 0f 84 92 01 00 00 48 89 fb <4c> 8b 67 50 48 8b 6f 58 $ [1.464241] RSP: :c90dfd10 EFLAGS: 00010082 [1.464241] RAX: 0001 RBX: fffe RCX: [1.464241] RDX: 0001 RSI: 0004 RDI: fffe [1.464241] RBP: 88ec7a72f368 R08: 0457 R09: 0039 [1.464241] R10: R11: c90dfa58 R12: 88ec7a0eec20 [1.464241] R13: 88ec6fd0eab0 R14: 81eae980 R15: [1.464241] FS: () GS:88ec7a60() knlGS: [1.464241] CS: 0010 DS: ES: CR0: 80050033 [1.464241] CR2: 004e CR3: 006c7900a001 C 001606b0 [1.464241] Call Trace: [1.464241] dmar_remove_one_dev_info.isra.68+0x27/0x40 [1.464241] intel_iommu_add_device+0x124/0x180 [1.464241] ? iommu_probe_device+0x40/0x40 [1.464241] add_iommu_group+0xa/0x20 [1.464241] bus_for_each_dev+0x77/0xc0 [1.464241] ? down_write+0xe/0x40 [1.464241] bus_set_iommu+0x85/0xc0 [1.464241] intel_iommu_init+0x4b4/0x777 [1.464241] ? e820__memblock_setup+0x63/0x63 [1.464241] ? do_early_param+0x91/0x91 [1.464241] pci_iommu_init+0x19/0x45 [1.464241] do_one_initcall+0x46/0x1c3 [1.464241] ? do_early_param+0x91/0x91 [1.464241] kernel_init_freeable+0x1af/0x258 [1.464241] ? rest_init+0xaa/0xaa [1.464241] kernel_init+0xa/0x107 [1.464241] ret_from_fork+0x35/0x40 [1.464241] Modules linked in: [1.464241] CR2: 004e [1.464241] ---[ end trace 0927d2ba8b8032b5 ]--- Cc: Joerg Roedel Cc: Lu Baolu Cc: David Woodhouse Cc: sta...@vger.kernel.org # v5.3+ Cc: iommu@lists.linux-foundation.org Fixes: ae23bfb68f28 ("iommu/vt-d: Detach domain before using a private one") Signed-off-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..e42a09794fa2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(_domain_lock, flags); info = dev->archdata.iommu; - if (info) + if (info && info != DEFER_DEVICE_DOMAIN_INFO + && info != DUMMY_DEVICE_DOMAIN_INFO) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(_domain_lock, flags); } -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Nack this. Apparently the issue is just being seen with the kdump kernel. I'm wondering if it is already solved by 6c3a44ed3c55 ("iommu/vt-d: Turn off translations at shutdown"). Testing a 5.5 build now. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Does this pose any potential issues with the reverse case where the group has a default identity domain, and the first device fits that, but a later device in the group needs dma and gets a private dma domain? Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu --- drivers/iommu/intel-iommu.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..b73bebea9148 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5478,9 +5478,6 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return -EINVAL; - if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5523,8 +5520,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, )); - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5556,9 +5551,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, ); if (pte) phys = dma_pte_addr(pte); -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Fri Dec 06 19, Lu Baolu wrote: [snip] Can you please try below change? Let's check whether the afending address has been mapped for device 01.00.2. $ git diff diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..d9daf66be849 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -663,6 +663,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, ret = iommu_map(domain, addr, addr, pg_size, entry->prot); if (ret) goto out; + + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx] for group %d\n", addr, addr + pg_size, group->id); } } I am doubting that device 01.00.2 is not in the device scope of [4.485108] DMAR: RMRR base: 0x00bdf6f000 end: 0x00bdf7efff By the way, does device 01.00.2 works well after binding the driver? When I boot it with passthrough it doesn't get to a point where I can login. I think the serial console on these systems is tied to the ilo, so the conserver connection could be making things worse. Unfortunately the system is remote. I should have more time now to focus on debugging this. Attaching console output for the above patch. Regards, Jerry Best regards, baolu [ 21.833145] DMAR: dmar0: Using Queued invalidation [ 21.834960] DMAR: dmar1: Using Queued invalidation [ 21.837860] pci :00:00.0: Adding to iommu group 0 [ 21.839733] pci :00:01.0: Adding to iommu group 1 [ 21.841601] pci :00:01.1: Adding to iommu group 2 [ 21.843410] pci :00:02.0: Adding to iommu group 3 [ 21.845652] pci :00:02.1: Adding to iommu group 4 [ 21.847473] pci :00:02.2: Adding to iommu group 5 [ 21.849299] pci :00:02.3: Adding to iommu group 6 [ 21.851163] pci :00:03.0: Adding to iommu group 7 [ 21.852918] pci :00:03.1: Adding to iommu group 8 [ 21.854720] pci :00:03.2: Adding to iommu group 9 [ 21.856761] pci :00:03.3: Adding to iommu group 10 [ 21.858998] pci :00:04.0: Adding to iommu group 11 [ 21.860937] pci :00:04.1: Adding to iommu group 11 [ 21.863149] pci :00:04.2: Adding to iommu group 11 [ 21.864941] pci :00:04.3: Adding to iommu group 11 [ 21.866744] pci :00:04.4: Adding to iommu group 11 [ 21.868567] pci :00:04.5: Adding to iommu group 11 [ 21.870458] pci :00:04.6: Adding to iommu group 11 [ 21.872254] pci :00:04.7: Adding to iommu group 11 [ 21.874231] pci :00:05.0: Adding to iommu group 12 [ 21.876047] pci :00:05.2: Adding to iommu group 12 [ 21.877908] pci :00:05.4: Adding to iommu group 12 [ 21.879814] pci :00:11.0: Adding to iommu group 13 [ 21.881684] pci :00:1a.0: Adding to iommu group 14 [ 21.883516] pci :00:1c.0: Adding to iommu group 15 [ 21.885324] pci :00:1c.7: Adding to iommu group 16 [ 21.887116] pci :00:1d.0: Adding to iommu group 17 [ 21.888907] pci :00:1e.0: Adding to iommu group 18 [ 21.891913] pci :00:1e.0: Using iommu dma mapping [ 21.894001] pci :00:1f.0: Adding to iommu group 19 [ 21.895979] pci :00:1f.2: Adding to iommu group 19 [ 21.898870] pci :00:1f.2: DMAR: Setting identity map [0xe8000 - 0xe8fff] [ 21.901428] pci :00:1f.2: DMAR: Setting identity map [0xf4000 - 0xf4fff] [ 21.903790] pci :00:1f.2: DMAR: Setting identity map [0xbdf6e000 - 0xbdf6efff] [ 21.906347] pci :00:1f.2: DMAR: Setting identity map [0xbdf6f000 - 0xbdf7efff] [ 21.908885] pci :00:1f.2: DMAR: Setting identity map [0xbdf7f000 - 0xbdf82fff] [ 21.911620] pci :00:1f.2: DMAR: Setting identity map [0xbdf83000 - 0xbdf84fff] [ 21.914141] pci :00:1f.2: DMAR: Device uses a private dma domain. [ 21.916690] pci :03:00.0: Adding to iommu group 20 [ 21.919589] pci :03:00.0: Using iommu dma mapping [ 21.921708] pci :03:00.1: Setting identity map [0xe8000 - 0xe9000] for group 20 [ 21.924309] pci :03:00.1: Setting identity map [0xf4000 - 0xf5000] for group 20 [ 21.926854] pci :03:00.1: Setting identity map [0xbdf6e000 - 0xbdf6f000] for group 20 [ 21.929560] pci :03:00.1: Setting identity map [0xbdf6f000 - 0xbdf7] for group 20 [ 21.932366] pci :03:00.1: Setting identity map [0xbdf00 - 0xbdf71000] for group 20 [ 22.035075] pci :03:00.1: Setting identity00 - 0xbdf72000] for group 20 [ 22.437744] pci :03:00.1: Setting identityp [0xbdf72000 - 0xbdf73000] for group 20 [ 22.540574] pci :03:00.1: Setting identity map [0xbdf73000 - 0xbdf74000] for group 20 [ 22.543522] pci :03:00.1: Setting identity map [0xbdf74000 - 0xbdf75000] for group 20 [ 22.546270] pci :03:00.1: Setting identity map [0xbdf75000 - 0xbdf76000] for group 20 [ 22.548969] pci :03:00.1: Setting identity map [0xbdf76000 - 0xbdf77000] for group 20 [ 22.551801] pci :03:00.1: Setting identity map [0xbdf77000 - 0xbdf78000] for
panic in dmar_remove_one_dev_info
HP is seeing a panic on gen9 dl360 and dl560 while testing these other changes we've been eorking on. I just took an initial look, but have to run to a dentist appointment so couldn't dig too deep. It looks like the device sets dev->archdata.iommu to DEFER_DEVICE_DOMAIN_INFO in intel_iommu_add_device, and then it needs a private domain so dmar_remove_one_dev_info gets called. That code path ends up trying to use DEFER_DEVICE_DOMAIN_INFO as a pointer. I don't need if there just needs to be a check in there to bail out if it sees DEFER_DEVICE_DOMAIN_INFO, or if something more is needed. I'll look at it some more when I get back home. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu: set group default domain before creating direct mappings
iommu_group_create_direct_mappings uses group->default_domain, but right after it is called, request_default_domain_for_dev calls iommu_domain_free for the default domain, and sets the group default domain to a different domain. Move the iommu_group_create_direct_mappings call to after the group default domain is set, so the direct mappings get associated with that domain. Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: sta...@vger.kernel.org Fixes: 7423e01741dd ("iommu: Add API to request DMA domain for device") Signed-off-by: Jerry Snitselaar --- drivers/iommu/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..fa908179b80b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2282,13 +2282,13 @@ request_default_domain_for_dev(struct device *dev, unsigned long type) goto out; } - iommu_group_create_direct_mappings(group, dev); - /* Make the domain the default for this group */ if (group->default_domain) iommu_domain_free(group->default_domain); group->default_domain = domain; + iommu_group_create_direct_mappings(group, dev); + dev_info(dev, "Using iommu %s mapping\n", type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Sun Dec 08 19, Lu Baolu wrote: Hi, On 12/7/19 10:41 AM, Jerry Snitselaar wrote: On Fri Dec 06 19, Jerry Snitselaar wrote: On Sat Dec 07 19, Lu Baolu wrote: Hi Jerry, On 12/6/19 3:24 PM, Jerry Snitselaar wrote: On Fri Dec 06 19, Lu Baolu wrote: [snip] Can you please try below change? Let's check whether the afending address has been mapped for device 01.00.2. $ git diff diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..d9daf66be849 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -663,6 +663,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, ret = iommu_map(domain, addr, addr, pg_size, entry->prot); if (ret) goto out; + + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx] for group %d\n", addr, addr + pg_size, group->id); } } I am doubting that device 01.00.2 is not in the device scope of [ 4.485108] DMAR: RMRR base: 0x00bdf6f000 end: 0x00bdf7efff By the way, does device 01.00.2 works well after binding the driver? When I boot it with passthrough it doesn't get to a point where I can login. I think the serial console on these systems is tied to the ilo, so the conserver connection could be making things worse. Unfortunately the system is remote. I should have more time now to focus on debugging this. Attaching console output for the above patch. It seems that device 01.00.2 isn't in the scope of RMRR [base: 0x00bdf6f000 end: 0x00bdf7efff]. But it still tries to access the address within it, hence faults generated. You can check it with ACPI/DMAR table. Best regards, baolu I believe it is the 3rd endpoint device entry in dmar data below. So question about request_default_domain_for_dev. Since a dma mapping is already done for 1.00.0, and that sets the default_domain for the group (I think), won't it bail out for 1.00.2 at this check? if (group->default_domain && group->default_domain->type == type) goto out; Or I guess request_default_domain_for_dev wouldn't even be called for 1.00.2. intel_iommu_add_device it wouldn't even call one of the request functions with 1.00.2 since domain->type would be dma from 1.00.0, and device_def_domain_type should return dma. Can you please add some debug messages and check what really happens here? Best regards, baolu [ 25.000544] pci :01:00.0: Adding to iommu group 25 [ 25.502243] pci :01:00.0: DMAR: domain->type is identity << intel_iommu_add_device (alloced in iommu_group_get_for_dev) [ 25.504239] pci :01:00.0: DMAR: device default domain type is dma. requesting dma domain << intel_iommu_add_device [ 25.507954] pci :01:00.0: Using iommu dma mapping<< request_default_domain_for_dev (now default domain for group is dma) [ 25.509765] pci :01:00.1: Adding to iommu group 25 [ 25.511514] pci :01:00.1: DMAR: domain->type is dma << intel_iommu_add_device [ 25.513263] pci :01:00.1: DMAR: device default domain type is identity. requesting identity domain << intel_iommu_add_device [ 25.516435] pci :01:00.1: don't change mappings of existing devices. << request_default_domain_for_dev [ 25.518669] pci :01:00.1: DMAR: Device uses a private identity domain. << intel_iommu_add_device [ 25.521061] pci :01:00.2: Adding to iommu group 25 [ 25.522791] pci :01:00.2: DMAR: domain->type is dma << intel_iommu_add_device [ 25.524706] pci :01:00.4: Adding to iommu group 25 [ 25.526458] pci :01:00.4: DMAR: domain->type is dma << intel_iommu_add_device [ 25.528213] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain << intel_iommu_add_device [ 25.531284] pci :01:00.4: don't change mappings of existing devices. << request_default_domain_for_dev [ 25.533500] pci :01:00.4: DMAR: Device uses a private identity domain. << intel_iommu_add_device So the domain type is dma after 01:00.0 gets added, and when intel_iommu_add_device is called for 01:00.2 it will go into the if section. Since the device default domain type for 01:00.2 is dma nothing happens in there, and it goes on to 01:00.4. Is the "private identity domain" message really accurate since everyone will use si_domain? Adding some more debugging. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Fix dmar pte read access not set error
On Wed Dec 11 19, Lu Baolu wrote: If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: Jerry Snitselaar Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Cc: sta...@vger.kernel.org # v5.3+ Signed-off-by: Lu Baolu Reviewed-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0c8d81f56a30..b73bebea9148 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5478,9 +5478,6 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return -EINVAL; - if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5523,8 +5520,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, )); - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5556,9 +5551,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, ); if (pte) phys = dma_pte_addr(pte); -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/amd: Pass gfp flags to iommu_map_page() in amd_iommu_map()
On Fri Oct 18 19, Joerg Roedel wrote: From: Joerg Roedel A recent commit added a gfp parameter to amd_iommu_map() to make it callable from atomic context, but forgot to pass it down to iommu_map_page() and left GFP_KERNEL there. This caused sleep-while-atomic warnings and needs to be fixed. Reported-by: Qian Cai Reported-by: Dan Carpenter Fixes: 781ca2de89ba ("iommu: Add gfp parameter to iommu_ops::map") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 0d2479546b77..fb54df5c2e11 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2561,7 +2561,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); + ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); domain_flush_np_cache(domain, iova, page_size); -- 2.16.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH -next] iommu/amd: fix a warning in increase_address_space
On Wed Oct 16 19, Jerry Snitselaar wrote: On Wed Oct 16 19, Qian Cai wrote: BTW, Joerg, this line from the commit "iommu/amd: Remove domain->updated" looks suspicious. Not sure what the purpose of it. *updated = increase_address_space(domain, gfp) || *updated; Looking at it again I think that isn't an issue really, it would just not lose updated being set in a previous loop iteration, but now I'm wondering about the loop itself. In the cases where it would return false, how does the evaluation of the condition for the while loop change? I guess the mode level 6 check is really for other potential callers increase_address_space, none exist at the moment, and the condition of the while loop in alloc_pte should fail if the mode level is 6.
Re: [PATCH -next] iommu/amd: fix a warning in increase_address_space
On Wed Oct 16 19, Qian Cai wrote: After the commit 754265bcab78 ("iommu/amd: Fix race in increase_address_space()"), it could still possible trigger a race condition under some heavy memory pressure below. The race to trigger a warning is, CPU0: CPU1: in alloc_pte(): in increase_address_space(): while (address > PM_LEVEL_SIZE(domain->mode)) [1] spin_lock_irqsave(>lock domain->mode+= 1; spin_unlock_irqrestore(>lock in increase_address_space(): spin_lock_irqsave(>lock if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) [1] domain->mode = 5 It is unclear the triggering of the warning is the root cause of the smartpqi offline yet, but let's fix it first by lifting the locking. WARNING: CPU: 57 PID: 124314 at drivers/iommu/amd_iommu.c:1474 iommu_map_page+0x718/0x7e0 smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec flags=0x0010] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec1000 flags=0x0010] CPU: 57 PID: 124314 Comm: oom01 Tainted: G O Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 RIP: 0010:iommu_map_page+0x718/0x7e0 Code: 88 a5 70 ff ff ff e9 5d fa ff ff 48 8b b5 70 ff ff ff 4c 89 ef e8 08 32 2f 00 41 80 fc 01 0f 87 b7 3d 00 00 41 83 e4 01 eb be <0f> 0b 48 8b b5 70 ff ff ff 4c 89 ef e8 e7 31 2f 00 eb dd 0f 0b 48 RSP: 0018:888da4816cb8 EFLAGS: 00010046 RAX: RBX: 8885fe689000 RCX: 96f4a6c4 RDX: 0007 RSI: dc00 RDI: 8885fe689124 RBP: 888da4816da8 R08: ed10bfcd120e R09: ed10bfcd120e R10: ed10bfcd120d R11: 8885fe68906b R12: smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec1a00 flags=0x0010] R13: 8885fe689068 R14: 8885fe689124 R15: smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec1e00 flags=0x0010] FS: 7f29722ba700() GS:88902f88() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7f27f82d8000 CR3: 00102ed9c000 CR4: 003406e0 Call Trace: smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec2000 flags=0x0010] map_sg+0x1ce/0x2f0 smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec2400 flags=0x0010] scsi_dma_map+0xd7/0x160 pqi_raid_submit_scsi_cmd_with_io_request+0x1b8/0x420 [smartpqi] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec2800 flags=0x0010] pqi_scsi_queue_command+0x8ab/0xe00 [smartpqi] smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec2c00 flags=0x0010] scsi_queue_rq+0xd19/0x1360 smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec3000 flags=0x0010] __blk_mq_try_issue_directly+0x295/0x3f0 smartpqi :23:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x address=0xffec3400 flags=0x0010] AMD-Vi: Event logged [IO_PAGE_FAULT device=23:00.0 domain=0x address=0xffec3800 flags=0x0010] blk_mq_request_issue_directly+0xb5/0x100 AMD-Vi: Event logged [IO_PAGE_FAULT device=23:00.0 domain=0x address=0xffec3c00 flags=0x0010] blk_mq_try_issue_list_directly+0xa9/0x160 blk_mq_sched_insert_requests+0x228/0x380 blk_mq_flush_plug_list+0x448/0x7e0 blk_flush_plug_list+0x1eb/0x230 blk_finish_plug+0x43/0x5d shrink_node_memcg+0x9c5/0x1550 smartpqi :23:00.0: controller is offline: status code 0x14803 smartpqi :23:00.0: controller offline Fixes: 754265bcab78 ("iommu/amd: Fix race in increase_address_space()") Signed-off-by: Qian Cai --- BTW, Joerg, this line from the commit "iommu/amd: Remove domain->updated" looks suspicious. Not sure what the purpose of it. *updated = increase_address_space(domain, gfp) || *updated; Looking at it again I think that isn't an issue really, it would just not lose updated being set in a previous loop iteration, but now I'm wondering about the loop itself. In the cases where it would return false, how does the evaluation of the condition for the while loop change? drivers/iommu/amd_iommu.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2369b8af81f3..a5754068aa29 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1465,12 +1465,9 @@ static void free_pagetable(struct protection_domain *domain) static bool increase_address_space(struct protection_domain *domain, gfp_t gfp) { - unsigned long flags; bool ret = false; u64 *pte; - spin_lock_irqsave(>lock, flags); - if (WARN_ON_ONCE(domain->mode ==
Re: [PATCH] iommu/amd: Check PM_LEVEL_SIZE() condition in locked section
On Fri Oct 18 19, Joerg Roedel wrote: On Thu, Oct 17, 2019 at 07:36:51AM -0400, Qian Cai wrote: > On Oct 16, 2019, at 6:59 PM, Jerry Snitselaar wrote: > > I guess the mode level 6 check is really for other potential callers > increase_address_space, none exist at the moment, and the condition > of the while loop in alloc_pte should fail if the mode level is 6. Because there is no locking around iommu_map_page(), if there are several concurrent callers of it for the same domain, could it be that it silently corrupt data due to invalid access? No, that can't happen because increase_address_space locks the domain before actually doing anything. So the address space can't grow above domain->mode == 6. But what can happen is that the WARN_ON_ONCE triggers in there and that the address space is increased multiple times when only one increase would be sufficient. To fix this we just need to check the PM_LEVEL_SIZE() condition again when we hold the lock: From e930e792a998e89dfd4feef15fbbf289c45124dc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 18 Oct 2019 11:34:22 +0200 Subject: [PATCH] iommu/amd: Check PM_LEVEL_SIZE() condition in locked section The increase_address_space() function has to check the PM_LEVEL_SIZE() condition again under the domain->lock to avoid a false trigger of the WARN_ON_ONCE() and to avoid that the address space is increase more often than necessary. Reported-by: Qian Cai Fixes: 754265bcab78 ("iommu/amd: Fix race in increase_address_space()") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2369b8af81f3..a0639e511ffe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1463,6 +1463,7 @@ static void free_pagetable(struct protection_domain *domain) * to 64 bits. */ static bool increase_address_space(struct protection_domain *domain, + unsigned long address, gfp_t gfp) { unsigned long flags; @@ -1471,8 +1472,8 @@ static bool increase_address_space(struct protection_domain *domain, spin_lock_irqsave(>lock, flags); - if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) - /* address space already 64 bit large */ + if (address <= PM_LEVEL_SIZE(domain->mode) || + WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); @@ -1505,7 +1506,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, gfp) || *updated; + *updated = increase_address_space(domain, address, gfp) || *updated; level = domain->mode - 1; pte = >pt_root[PM_LEVEL_INDEX(level, address)]; -- 2.16.4 Reviewed-by: Jerry Snitselaar
Re: [PATCH v3] iommu: fix KASAN use-after-free in iommu_insert_resv_region
On Tue Nov 26 19, Eric Auger wrote: In case the new region gets merged into another one, the nr list node is freed. Checking its type while completing the merge algorithm leads to a use-after-free. Use new->type instead. Fixes: 4dbd258ff63e ("iommu: Revisit iommu_insert_resv_region() implementation") Signed-off-by: Eric Auger Reported-by: Qian Cai Cc: Stable #v5.3+ Minor nit, but should the comment above list_for_each_entry_safe get updated as well? Other than that, lgtm. Reviewed-by: Jerry Snitselaar --- v2 -> v3: - directly use new->type v1 -> v2: - remove spurious new line --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d658c7c6a2ab..285ad4a4c7f2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -313,7 +313,7 @@ int iommu_insert_resv_region(struct iommu_resv_region *new, phys_addr_t top_end, iter_end = iter->start + iter->length - 1; /* no merge needed on elements of different types than @nr */ - if (iter->type != nr->type) { + if (iter->type != new->type) { list_move_tail(>list, ); continue; } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Mon Dec 02 19, Jerry Snitselaar wrote: On Mon Dec 02 19, Lu Baolu wrote: Hi, On 12/2/19 2:34 PM, Jerry Snitselaar wrote: We are seeing DMAR PTE read access not set errors when booting a kernel with default passthrough, both with a test kernel and with a 5.4.0 kernel. Previously we would see a number of identity mappings being set related to the rmrrs, and now they aren't seen and we get the dmar pte errors as devices touch those regions. From what I can tell currently df4f3c603aeb ("iommu/vt-d: Remove static identity map code") removed the bit of code in init_dmars that used to set up those mappings: - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } si_domain_init now has code that sets identity maps for devices in rmrrs, but only for certain devices. On which device, are you seeing this error? Is it a rmrr locked device? Best regards, baolu Almost all of the messages are for the ilo, but there also is a message for the smart array raid bus controller. Also seeing it with a dl380 gen9 system, where the raid bus controller is getting the error. With iommu=nopt, the system boots up without issue. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Mon Dec 02 19, Lu Baolu wrote: Hi, On 12/2/19 2:34 PM, Jerry Snitselaar wrote: We are seeing DMAR PTE read access not set errors when booting a kernel with default passthrough, both with a test kernel and with a 5.4.0 kernel. Previously we would see a number of identity mappings being set related to the rmrrs, and now they aren't seen and we get the dmar pte errors as devices touch those regions. From what I can tell currently df4f3c603aeb ("iommu/vt-d: Remove static identity map code") removed the bit of code in init_dmars that used to set up those mappings: - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } si_domain_init now has code that sets identity maps for devices in rmrrs, but only for certain devices. On which device, are you seeing this error? Is it a rmrr locked device? Best regards, baolu Almost all of the messages are for the ilo, but there also is a message for the smart array raid bus controller. With iommu=nopt, the system boots up without issue. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
dmar pte read access not set error messages on hp dl388 gen8 systems
We are seeing DMAR PTE read access not set errors when booting a kernel with default passthrough, both with a test kernel and with a 5.4.0 kernel. Previously we would see a number of identity mappings being set related to the rmrrs, and now they aren't seen and we get the dmar pte errors as devices touch those regions. From what I can tell currently df4f3c603aeb ("iommu/vt-d: Remove static identity map code") removed the bit of code in init_dmars that used to set up those mappings: - /* -* For each rmrr -* for each dev attached to rmrr -* do -* locate drhd for dev, alloc domain for dev -* allocate free domain -* allocate page table entries for rmrr -* if context not allocated for bus -* allocate and init context -* set present in root table for this bus -* init context with domain, translation etc -*endfor -* endfor -*/ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } si_domain_init now has code that sets identity maps for devices in rmrrs, but only for certain devices. With iommu=nopt, the system boots up without issue. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 6/6] iommu/amd: Switch to use acpi_dev_hid_uid_match()
On Tue Sep 24 19, Andy Shevchenko wrote: Since we have a generic helper, drop custom implementation in the driver. Signed-off-by: Andy Shevchenko --- Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/2] iommu/vt-d: Select PCI_PRI for INTEL_IOMMU_SVM
On Wed Oct 09 19, Bjorn Helgaas wrote: From: Bjorn Helgaas I think intel-iommu.c depends on CONFIG_AMD_IOMMU in an undesirable way: When CONFIG_INTEL_IOMMU_SVM=y, iommu_enable_dev_iotlb() calls PRI interfaces (pci_reset_pri() and pci_enable_pri()), but those are only implemented when CONFIG_PCI_PRI is enabled. If CONFIG_PCI_PRI is not enabled, there are stubs that just return failure. The INTEL_IOMMU_SVM Kconfig does nothing with PCI_PRI, but AMD_IOMMU selects PCI_PRI. So if AMD_IOMMU is enabled, intel-iommu.c gets the full PRI interfaces. If AMD_IOMMU is not enabled, it gets the PRI stubs. This seems wrong. The first patch here makes INTEL_IOMMU_SVM select PCI_PRI so intel-iommu.c always gets the full PRI interfaces. The second patch moves pci_prg_resp_pasid_required(), which simply returns a bit from the PCI capability, from #ifdef CONFIG_PCI_PASID to #ifdef CONFIG_PCI_PRI. This is related because INTEL_IOMMU_SVM already *does* select PCI_PASID, so it previously always got pci_prg_resp_pasid_required() even though it got stubs for other PRI things. Since these are related and I have several follow-on ATS-related patches in the queue, I'd like to take these both via the PCI tree. Bjorn Helgaas (2): iommu/vt-d: Select PCI_PRI for INTEL_IOMMU_SVM PCI/ATS: Move pci_prg_resp_pasid_required() to CONFIG_PCI_PRI drivers/iommu/Kconfig | 1 + drivers/pci/ats.c | 55 +++-- include/linux/pci-ats.h | 11 - 3 files changed, 31 insertions(+), 36 deletions(-) -- 2.23.0.581.g78d2f28ef7-goog ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 6/6] iommu/amd: Switch to use acpi_dev_hid_uid_match()
On Tue Oct 01 19, Andy Shevchenko wrote: Since we have a generic helper, drop custom implementation in the driver. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- drivers/iommu/amd_iommu.c | 30 +- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2369b8af81f3..40f3cf44aa98 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -124,30 +124,6 @@ static struct lock_class_key reserved_rbtree_key; * / -static inline int match_hid_uid(struct device *dev, - struct acpihid_map_entry *entry) -{ - struct acpi_device *adev = ACPI_COMPANION(dev); - const char *hid, *uid; - - if (!adev) - return -ENODEV; - - hid = acpi_device_hid(adev); - uid = acpi_device_uid(adev); - - if (!hid || !(*hid)) - return -ENODEV; - - if (!uid || !(*uid)) - return strcmp(hid, entry->hid); - - if (!(*entry->uid)) - return strcmp(hid, entry->hid); - - return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid)); -} - static inline u16 get_pci_device_id(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -158,10 +134,14 @@ static inline u16 get_pci_device_id(struct device *dev) static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { + struct acpi_device *adev = ACPI_COMPANION(dev); struct acpihid_map_entry *p; + if (!adev) + return -ENODEV; + list_for_each_entry(p, _map, list) { - if (!match_hid_uid(dev, p)) { + if (acpi_dev_hid_uid_match(adev, p->hid, p->uid)) { if (entry) *entry = p; return p->devid; -- 2.23.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/vt-d: Don't reject nvme host due to scope mismatch
On Fri Dec 20 19, jimyan wrote: On a system with an Intel PCIe port configured as a nvme host device, iommu initialization fails with DMAR: Device scope type does not match for :80:00.0 This is because the DMAR table reports this device as having scope 2 (ACPI_DMAR_SCOPE_TYPE_BRIDGE): Isn't that a problem to be fixed in the DMAR table then? but the device has a type 0 PCI header: 80:00.0 Class 0600: Device 8086:2020 (rev 06) 00: 86 80 20 20 47 05 10 00 06 00 00 06 10 00 00 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 00 00 30: 00 00 00 00 90 00 00 00 00 00 00 00 00 01 00 00 VT-d works perfectly on this system, so there's no reason to bail out on initialization due to this apparent scope mismatch. Add the class 0x600 ("PCI_CLASS_BRIDGE_HOST") as a heuristic for allowing DMAR initialization for non-bridge PCI devices listed with scope bridge. Signed-off-by: jimyan --- drivers/iommu/dmar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index eecd6a421667..9faf2f0e0237 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -244,6 +244,7 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) || (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE && (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + info->dev->class >> 8 != PCI_CLASS_BRIDGE_HOST && info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) { pr_warn("Device scope type does not match for %s\n", pci_name(info->dev)); -- 2.11.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Mon Dec 09 19, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: [snip] A call to iommu_map is failing. [ 36.686881] pci :01:00.2: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 36.689843] pci :01:00.2: iommu_group_create_direct_mappings: iterating through mappings [ 36.692757] pci :01:00.2: iommu_group_create_direct_mappings: calling apply_resv_region [ 36.695526] pci :01:00.2: e_direct_mappings: entry type is direct [ 37.198053] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.201357] pci :01:00.2: iommu_group_create_direct_mappings: iommu_map failed [ 37.203973] pci :01:00.2: iommu_group_create_direct_mappings: leaving func [ 37.206385] pci :01:00.2: iommu_group_add_device: calling __iommu_attach_device [ 37.208950] pci :01:00.2: Adding to iommu group 25 [ 37.210660] pci :01:00.2: DMAR: domain->type is dma It bails at the dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN check at the beginning of intel_iommu_map. I will verify, but it looks like that is getting set when intel_iommu_add_device is called for 01:00.1. request_default_domain_for_dev for 01:00.1 will return -EBUSY because iommu_group_device_count(group) != 1. Also I see 01:00.0 and others that are the first in a group exiting iommu_group_create_direct_mappings at the (!domain || domain->type != IOMMU_DOMAIN_DMA) check. In request_default_domain_for_dev default_domain doesn't getting set until after that call. Should the iommu_group_create_direct_mappings call be moved below where group->default_domain gets set? Also fails for 01:00.4: [ 37.212448] pci :01:00.4: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 37.215382] pci :01:00.4: iommu_group_create_direct_mappings: iterating through mappings [ 37.218170] pci :01:00.4: iommu_group_create_direct_mappings: calling apply_resv_region [ 37.220933] pci :01:00.4: iommu_group_create_direct_mappings: entry type is direct-relaxable [ 37.223932] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.226857] pci :01:00.4: iommu_group_create_direct_mappings: iommu_map failed [ 37.229300] pci :01:00.4: iommu_group_create_direct_mappings: leaving func [ 37.231648] pci :01:00.4: iommu_group_add_device: calling __iommu_attach_device [ 37.234194] pci :01:00.4: Adding to iommu group 25 [ 37.236192] pci :01:00.4: DMAR: domain->type is dma [ 37.237958] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain [ 37.241061] pci :01:00.4: don't change mappings of existing d37.489870] pci :01:00.4: DMAR: Device uses a private identity domain. There is an RMRR for 0xbddde000-0xefff: [63Ah 1594 2]Subtable Type : 0001 [Reserved Memory Region] [63Ch 1596 2] Length : 0036 [63Eh 1598 2] Reserved : [640h 1600 2] PCI Segment Number : [642h 1602 8] Base Address : BDDDE000 [64Ah 1610 8] End Address (limit) : BDDDEFFF [652h 1618 1]Device Scope Type : 01 [PCI Endpoint Device] [653h 1619 1] Entry Length : 0A [654h 1620 2] Reserved : [656h 1622 1] Enumeration ID : 00 [657h 1623 1] PCI Bus Number : 00 [658h 1624 2] PCI Path : 1C,07 [65Ah 1626 2] PCI Path : 00,00 [65Ch 1628 1]Device Scope Type : 01 [PCI Endpoint Device] [65Dh 1629 1] Entry Length : 0A [65Eh 1630 2] Reserved : [660h 1632 1] Enumeration ID : 00 [661h 1633 1] PCI Bus Number : 00 [662h 1634 2] PCI Path : 1C,07 [664h 1636 2] PCI Path : 00,02 [666h 1638 1]Device Scope Type : 01 [PCI Endpoint Device] [667h 1639 1] Entry Length : 0A [668h 1640 2] Reserved : [66Ah 1642 1] Enumeration ID : 00 [66Bh 1643 1] PCI Bus Number : 00 [66Ch 1644 2] PCI Path : 1C,07 [66Eh 1646 2] PCI Path : 00,04 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Tue Dec 10 19, Lu Baolu wrote: Hi, On 12/10/19 8:52 AM, Jerry Snitselaar wrote: On Sun Dec 08 19, Lu Baolu wrote: Hi, On 12/7/19 10:41 AM, Jerry Snitselaar wrote: On Fri Dec 06 19, Jerry Snitselaar wrote: On Sat Dec 07 19, Lu Baolu wrote: Hi Jerry, On 12/6/19 3:24 PM, Jerry Snitselaar wrote: On Fri Dec 06 19, Lu Baolu wrote: [snip] Can you please try below change? Let's check whether the afending address has been mapped for device 01.00.2. $ git diff diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index db7bfd4f2d20..d9daf66be849 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -663,6 +663,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, ret = iommu_map(domain, addr, addr, pg_size, entry->prot); if (ret) goto out; + + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx] for group %d\n", addr, addr + pg_size, group->id); } } I am doubting that device 01.00.2 is not in the device scope of [ 4.485108] DMAR: RMRR base: 0x00bdf6f000 end: 0x00bdf7efff By the way, does device 01.00.2 works well after binding the driver? When I boot it with passthrough it doesn't get to a point where I can login. I think the serial console on these systems is tied to the ilo, so the conserver connection could be making things worse. Unfortunately the system is remote. I should have more time now to focus on debugging this. Attaching console output for the above patch. It seems that device 01.00.2 isn't in the scope of RMRR [base: 0x00bdf6f000 end: 0x00bdf7efff]. But it still tries to access the address within it, hence faults generated. You can check it with ACPI/DMAR table. Best regards, baolu I believe it is the 3rd endpoint device entry in dmar data below. So question about request_default_domain_for_dev. Since a dma mapping is already done for 1.00.0, and that sets the default_domain for the group (I think), won't it bail out for 1.00.2 at this check? if (group->default_domain && group->default_domain->type == type) goto out; Or I guess request_default_domain_for_dev wouldn't even be called for 1.00.2. intel_iommu_add_device it wouldn't even call one of the request functions with 1.00.2 since domain->type would be dma from 1.00.0, and device_def_domain_type should return dma. Can you please add some debug messages and check what really happens here? Best regards, baolu [ 25.000544] pci :01:00.0: Adding to iommu group 25 [ 25.502243] pci :01:00.0: DMAR: domain->type is identity << intel_iommu_add_device (alloced in iommu_group_get_for_dev) [ 25.504239] pci :01:00.0: DMAR: device default domain type is dma. requesting dma domain << intel_iommu_add_device [ 25.507954] pci :01:00.0: Using iommu dma mapping << request_default_domain_for_dev (now default domain for group is dma) [ 25.509765] pci :01:00.1: Adding to iommu group 25 [ 25.511514] pci :01:00.1: DMAR: domain->type is dma << intel_iommu_add_device [ 25.513263] pci :01:00.1: DMAR: device default domain type is identity. requesting identity domain << intel_iommu_add_device [ 25.516435] pci :01:00.1: don't change mappings of existing devices. << request_default_domain_for_dev [ 25.518669] pci :01:00.1: DMAR: Device uses a private identity domain. << intel_iommu_add_device [ 25.521061] pci :01:00.2: Adding to iommu group 25 [ 25.522791] pci :01:00.2: DMAR: domain->type is dma << intel_iommu_add_device [ 25.524706] pci :01:00.4: Adding to iommu group 25 [ 25.526458] pci :01:00.4: DMAR: domain->type is dma << intel_iommu_add_device [ 25.528213] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain << intel_iommu_add_device [ 25.531284] pci :01:00.4: don't change mappings of existing devices. << request_default_domain_for_dev [ 25.533500] pci :01:00.4: DMAR: Device uses a private identity domain. << intel_iommu_add_device So the domain type is dma after 01:00.0 gets added, and when intel_iommu_add_device is called for 01:00.2 it will go into the if section. Since the device default domain type for 01:00.2 is dma nothing happens in there, and it goes on to 01:00.4. Is the "private identity domain" message really accurate since everyone will use si_domain? Adding some more debugging. The facts that we have seen: 1) 01.00.2 uses the default domain in group 25. The domain type of this default domain is DMA. 2) iommu_group_create_direct_mappings() *should* be called when adding 01.00.2 into group 25. As the result, RMRR for this device *should* be identity mapped. 3) By checking DMAR table, RMRR (0x00bdf6f000 ~ 0
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Mon Dec 09 19, Jerry Snitselaar wrote: [snip] A call to iommu_map is failing. [ 36.686881] pci :01:00.2: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 36.689843] pci :01:00.2: iommu_group_create_direct_mappings: iterating through mappings [ 36.692757] pci :01:00.2: iommu_group_create_direct_mappings: calling apply_resv_region [ 36.695526] pci :01:00.2: e_direct_mappings: entry type is direct [ 37.198053] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.201357] pci :01:00.2: iommu_group_create_direct_mappings: iommu_map failed [ 37.203973] pci :01:00.2: iommu_group_create_direct_mappings: leaving func [ 37.206385] pci :01:00.2: iommu_group_add_device: calling __iommu_attach_device [ 37.208950] pci :01:00.2: Adding to iommu group 25 [ 37.210660] pci :01:00.2: DMAR: domain->type is dma It bails at the dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN check at the beginning of intel_iommu_map. I will verify, but it looks like that is getting set when intel_iommu_add_device is called for 01:00.1. request_default_domain_for_dev for 01:00.1 will return -EBUSY because iommu_group_device_count(group) != 1. Also fails for 01:00.4: [ 37.212448] pci :01:00.4: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 37.215382] pci :01:00.4: iommu_group_create_direct_mappings: iterating through mappings [ 37.218170] pci :01:00.4: iommu_group_create_direct_mappings: calling apply_resv_region [ 37.220933] pci :01:00.4: iommu_group_create_direct_mappings: entry type is direct-relaxable [ 37.223932] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.226857] pci :01:00.4: iommu_group_create_direct_mappings: iommu_map failed [ 37.229300] pci :01:00.4: iommu_group_create_direct_mappings: leaving func [ 37.231648] pci :01:00.4: iommu_group_add_device: calling __iommu_attach_device [ 37.234194] pci :01:00.4: Adding to iommu group 25 [ 37.236192] pci :01:00.4: DMAR: domain->type is dma [ 37.237958] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain [ 37.241061] pci :01:00.4: don't change mappings of existing d37.489870] pci :01:00.4: DMAR: Device uses a private identity domain. There is an RMRR for 0xbddde000-0xefff: [63Ah 1594 2]Subtable Type : 0001 [Reserved Memory Region] [63Ch 1596 2] Length : 0036 [63Eh 1598 2] Reserved : [640h 1600 2] PCI Segment Number : [642h 1602 8] Base Address : BDDDE000 [64Ah 1610 8] End Address (limit) : BDDDEFFF [652h 1618 1]Device Scope Type : 01 [PCI Endpoint Device] [653h 1619 1] Entry Length : 0A [654h 1620 2] Reserved : [656h 1622 1] Enumeration ID : 00 [657h 1623 1] PCI Bus Number : 00 [658h 1624 2] PCI Path : 1C,07 [65Ah 1626 2] PCI Path : 00,00 [65Ch 1628 1]Device Scope Type : 01 [PCI Endpoint Device] [65Dh 1629 1] Entry Length : 0A [65Eh 1630 2] Reserved : [660h 1632 1] Enumeration ID : 00 [661h 1633 1] PCI Bus Number : 00 [662h 1634 2] PCI Path : 1C,07 [664h 1636 2] PCI Path : 00,02 [666h 1638 1]Device Scope Type : 01 [PCI Endpoint Device] [667h 1639 1] Entry Length : 0A [668h 1640 2] Reserved : [66Ah 1642 1] Enumeration ID : 00 [66Bh 1643 1] PCI Bus Number : 00 [66Ch 1644 2] PCI Path : 1C,07 [66Eh 1646 2] PCI Path : 00,04 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Mon Dec 09 19, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: [snip] A call to iommu_map is failing. [ 36.686881] pci :01:00.2: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 36.689843] pci :01:00.2: iommu_group_create_direct_mappings: iterating through mappings [ 36.692757] pci :01:00.2: iommu_group_create_direct_mappings: calling apply_resv_region [ 36.695526] pci :01:00.2: e_direct_mappings: entry type is direct [ 37.198053] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.201357] pci :01:00.2: iommu_group_create_direct_mappings: iommu_map failed [ 37.203973] pci :01:00.2: iommu_group_create_direct_mappings: leaving func [ 37.206385] pci :01:00.2: iommu_group_add_device: calling __iommu_attach_device [ 37.208950] pci :01:00.2: Adding to iommu group 25 [ 37.210660] pci :01:00.2: DMAR: domain->type is dma It bails at the dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN check at the beginning of intel_iommu_map. I will verify, but it looks like that is getting set when intel_iommu_add_device is called for 01:00.1. request_default_domain_for_dev for 01:00.1 will return -EBUSY because iommu_group_device_count(group) != 1. Also I see 01:00.0 and others that are the first in a group exiting iommu_group_create_direct_mappings at the (!domain || domain->type != IOMMU_DOMAIN_DMA) check. In request_default_domain_for_dev default_domain doesn't getting set until after that call. Should the iommu_group_create_direct_mappings call be moved below where group->default_domain gets set? Doing this the system boots, and I don't get any dmar pte read errors. I still see the map failing because of the DOMAIN_FLAG_LOSE_CHILDREN in those cases mentioned above, but it no longer is spitting out tons of dmar pte read errors. Also fails for 01:00.4: [ 37.212448] pci :01:00.4: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 37.215382] pci :01:00.4: iommu_group_create_direct_mappings: iterating through mappings [ 37.218170] pci :01:00.4: iommu_group_create_direct_mappings: calling apply_resv_region [ 37.220933] pci :01:00.4: iommu_group_create_direct_mappings: entry type is direct-relaxable [ 37.223932] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.226857] pci :01:00.4: iommu_group_create_direct_mappings: iommu_map failed [ 37.229300] pci :01:00.4: iommu_group_create_direct_mappings: leaving func [ 37.231648] pci :01:00.4: iommu_group_add_device: calling __iommu_attach_device [ 37.234194] pci :01:00.4: Adding to iommu group 25 [ 37.236192] pci :01:00.4: DMAR: domain->type is dma [ 37.237958] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain [ 37.241061] pci :01:00.4: don't change mappings of existing d37.489870] pci :01:00.4: DMAR: Device uses a private identity domain. There is an RMRR for 0xbddde000-0xefff: [63Ah 1594 2]Subtable Type : 0001 [Reserved Memory Region] [63Ch 1596 2] Length : 0036 [63Eh 1598 2] Reserved : [640h 1600 2] PCI Segment Number : [642h 1602 8] Base Address : BDDDE000 [64Ah 1610 8] End Address (limit) : BDDDEFFF [652h 1618 1]Device Scope Type : 01 [PCI Endpoint Device] [653h 1619 1] Entry Length : 0A [654h 1620 2] Reserved : [656h 1622 1] Enumeration ID : 00 [657h 1623 1] PCI Bus Number : 00 [658h 1624 2] PCI Path : 1C,07 [65Ah 1626 2] PCI Path : 00,00 [65Ch 1628 1]Device Scope Type : 01 [PCI Endpoint Device] [65Dh 1629 1] Entry Length : 0A [65Eh 1630 2] Reserved : [660h 1632 1] Enumeration ID : 00 [661h 1633 1] PCI Bus Number : 00 [662h 1634 2] PCI Path : 1C,07 [664h 1636 2] PCI Path : 00,02 [666h 1638 1]Device Scope Type : 01 [PCI Endpoint Device] [667h 1639 1] Entry Length : 0A [668h 1640 2] Reserved : [66Ah 1642 1] Enumeration ID : 00 [66Bh 1643 1] PCI Bus Number : 00 [66Ch 1644 2] PCI Path : 1C,07 [66Eh 1646 2] PCI Path : 00,04 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar pte read access not set error messages on hp dl388 gen8 systems
On Tue Dec 10 19, Lu Baolu wrote: Hi, On 12/10/19 2:16 PM, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: On Mon Dec 09 19, Jerry Snitselaar wrote: [snip] A call to iommu_map is failing. [ 36.686881] pci :01:00.2: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 36.689843] pci :01:00.2: iommu_group_create_direct_mappings: iterating through mappings [ 36.692757] pci :01:00.2: iommu_group_create_direct_mappings: calling apply_resv_region [ 36.695526] pci :01:00.2: e_direct_mappings: entry type is direct [ 37.198053] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.201357] pci :01:00.2: iommu_group_create_direct_mappings: iommu_map failed [ 37.203973] pci :01:00.2: iommu_group_create_direct_mappings: leaving func [ 37.206385] pci :01:00.2: iommu_group_add_device: calling __iommu_attach_device [ 37.208950] pci :01:00.2: Adding to iommu group 25 [ 37.210660] pci :01:00.2: DMAR: domain->type is dma It bails at the dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN check at the beginning of intel_iommu_map. I will verify, but it looks like that is getting set when intel_iommu_add_device is called for 01:00.1. request_default_domain_for_dev for 01:00.1 will return -EBUSY because iommu_group_device_count(group) != 1. Also I see 01:00.0 and others that are the first in a group exiting iommu_group_create_direct_mappings at the (!domain || domain->type != IOMMU_DOMAIN_DMA) check. In request_default_domain_for_dev default_domain doesn't getting set until after that call. Should the iommu_group_create_direct_mappings call be moved below where group->default_domain gets set? Doing this the system boots, and I don't get any dmar pte read errors. I still see the map failing because of the DOMAIN_FLAG_LOSE_CHILDREN in those cases mentioned above, but it no longer is spitting out tons of dmar pte read errors. You can post a patch if you think this is worth of. Best regards, baolu I will send a patch tomorrow. In the case where you have default passthrough enabled, if the default domain type for the first device in a group is dma the call will fail, because iommu_group_create_direct_mappings uses group->default_domain and that will have an identity type until group->default_domain gets set right after the iommu_group_create_direct_mappings call. Regards, Jerry Also fails for 01:00.4: [ 37.212448] pci :01:00.4: iommu_group_add_device: calling iommu_group_create_direct_mappings [ 37.215382] pci :01:00.4: iommu_group_create_direct_mappings: iterating through mappings [ 37.218170] pci :01:00.4: iommu_group_create_direct_mappings: calling apply_resv_region [ 37.220933] pci :01:00.4: iommu_group_create_direct_mappings: entry type is direct-relaxable [ 37.223932] iommu: iommu_map: ops->map failed iova 0xbddde000 pa 0xbddde000 pgsize 0x1000 [ 37.226857] pci :01:00.4: iommu_group_create_direct_mappings: iommu_map failed [ 37.229300] pci :01:00.4: iommu_group_create_direct_mappings: leaving func [ 37.231648] pci :01:00.4: iommu_group_add_device: calling __iommu_attach_device [ 37.234194] pci :01:00.4: Adding to iommu group 25 [ 37.236192] pci :01:00.4: DMAR: domain->type is dma [ 37.237958] pci :01:00.4: DMAR: device default domain type is identity. requesting identity domain [ 37.241061] pci :01:00.4: don't change mappings of existing d37.489870] pci :01:00.4: DMAR: Device uses a private identity domain. There is an RMRR for 0xbddde000-0xefff: [63Ah 1594 2] Subtable Type : 0001 [Reserved Memory Region] [63Ch 1596 2] Length : 0036 [63Eh 1598 2] Reserved : [640h 1600 2] PCI Segment Number : [642h 1602 8] Base Address : BDDDE000 [64Ah 1610 8] End Address (limit) : BDDDEFFF [652h 1618 1] Device Scope Type : 01 [PCI Endpoint Device] [653h 1619 1] Entry Length : 0A [654h 1620 2] Reserved : [656h 1622 1] Enumeration ID : 00 [657h 1623 1] PCI Bus Number : 00 [658h 1624 2] PCI Path : 1C,07 [65Ah 1626 2] PCI Path : 00,00 [65Ch 1628 1] Device Scope Type : 01 [PCI Endpoint Device] [65Dh 1629 1] Entry Length : 0A [65Eh 1630 2] Reserved : [660h 1632 1] Enumeration ID : 00 [661h 1633 1] PCI Bus Number : 00 [662h 1634 2] PCI Path : 1C,07 [664h 1636 2] PCI Path : 00,02 [666h 1638 1] Device Scope Type : 01 [PCI Endpoint Device] [667h 1639 1] Entry Length : 0A [668h 1640
warning from domain_get_iommu
I'm working on getting a system to reproduce this, and verify it also occurs with 5.5, but I have a report of a case where the kdump kernel gives warnings like the following on a hp dl360 gen9: [2.830589] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [2.832615] ehci-pci: EHCI PCI platform driver [2.834190] ehci-pci :00:1a.0: EHCI Host Controller [2.835974] ehci-pci :00:1a.0: new USB bus registered, assigned bus number 1 [2.838276] ehci-pci :00:1a.0: debug port 2 [2.839700] WARNING: CPU: 0 PID: 1 at drivers/iommu/intel-iommu.c:598 domain_get_iommu+0x55/0x60 [2.840671] Modules linked in: [2.840671] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-170.el8.kdump2.x86_64 #1 [2.840671] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [2.840671] RIP: 0010:domain_get_iommu+0x55/0x60 [2.840671] Code: c2 01 eb 0b 48 83 c0 01 8b 34 87 85 f6 75 0b 48 63 c8 48 39 c2 75 ed 31 c0 c3 48 c1 e1 03 48 8b 05 70 f3 91 01 48 8b 04 08 c3 <0f> 0b 31 c0 c3 31 c9 eb eb 66 90 0f 1f 44 00 00 41 55 40 0f b6 f6 [2.840671] RSP: 0018:c90dfab8 EFLAGS: 00010202 [2.840671] RAX: 88ec7f1c8000 RBX: 006c7c867000 RCX: [2.840671] RDX: fff0 RSI: RDI: 88ec7f1c8000 [2.840671] RBP: 88ec6f7000b0 R08: 88ec7f19d000 R09: 88ec7cbfcd00 [2.840671] R10: 0095 R11: c90df928 R12: [2.840671] R13: 88ec7f1c8000 R14: 1000 R15: [2.840671] FS: () GS:88ec7f60() knlGS: [2.840671] CS: 0010 DS: ES: CR0: 80050033 [2.840671] CR2: 7ff3e1713000 CR3: 006c7de0a004 CR4: 001606b0 [2.840671] Call Trace: [2.840671] __intel_map_single+0x62/0x140 [2.840671] intel_alloc_coherent+0xa6/0x130 [2.840671] dma_pool_alloc+0xd8/0x1e0 [2.840671] e_qh_alloc+0x55/0x130 [2.840671] ehci_setup+0x284/0x7b0 [2.840671] ehci_pci_setup+0xa3/0x530 [2.840671] usb_add_hcd+0x2b6/0x800 [2.840671] usb_hcd_pci_probe+0x375/0x460 [2.840671] local_pci_probe+0x41/0x90 [2.840671] pci_device_probe+0x105/0x1b0 [2.840671] driver_probe_device+0x12d/0x460 [2.840671] device_driver_attach+0x50/0x60 [2.840671] __driver_attach+0x61/0x130 [2.840671] ? device_driver_attach+0x60/0x60 [2.840671] bus_for_each_dev+0x77/0xc0 [2.840671] ? klist_add_tail+0x3b/0x70 [2.840671] bus_add_driver+0x14d/0x1e0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] ? do_early_param+0x91/0x91 [2.840671] driver_register+0x6b/0xb0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] do_one_initcall+0x46/0x1c3 [2.840671] ? do_early_param+0x91/0x91 [2.840671] kernel_init_freeable+0x1af/0x258 [2.840671] ? rest_init+0xaa/0xaa [2.840671] kernel_init+0xa/0xf9 [2.840671] ret_from_fork+0x35/0x40 [2.840671] ---[ end trace e87b0d9a1c8135c4 ]--- [3.010848] ehci-pci :00:1a.0: Using iommu dma mapping [3.012551] ehci-pci :00:1a.0: 32bit DMA uses non-identity mapping [3.018537] ehci-pci :00:1a.0: cache line size of 64 is not supported [3.021188] ehci-pci :00:1a.0: irq 18, io mem 0x93002000 [3.029006] ehci-pci :00:1a.0: USB 2.0 started, EHCI 1.00 [3.030918] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 4.18 [3.033491] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1 [3.035900] usb usb1: Product: EHCI Host Controller [3.037423] usb usb1: Manufacturer: Linux 4.18.0-170.el8.kdump2.x86_64 ehci_hcd [3.039691] usb usb1: SerialNumber: :00:1a.0 It looks like the device finishes initializing once it figures out it needs dma mapping instead of the default passthrough. intel_alloc_coherent calls iommu_need_mapping, before it calls __intel_map_single, so I'm not sure why it is tripping over the WARN_ON in domain_get_iommu. one thing I noticed while looking at this is that domain_get_iommu can return NULL. So should there be something like the following in __intel_map_single after the domain_get_iommu call? if (!iommu) goto error; It is possible to deref the null pointer later otherwise. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: Seeing some another issue with mixed domains in the same iommu_group
On Thu Feb 06 20, Jerry Snitselaar wrote: Hi Baolu, I'm seeing another issue with the devices in the HP ilo when the system is booted with intel_iommu=on and iommu=pt (iommu=nopt does not run into problems). first system: 01:00.0 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support (rev 05) 01:00.1 VGA compatible controller: Matrox Electronics Systems Ltd. MGA G200EH 01:00.2 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging (rev 05) 01:00.4 USB controller: Hewlett-Packard Company Integrated Lights-Out Standard Virtual USB Controller (rev 02) [ 21.208103] pci :01:00.0: Adding to iommu group 24 [ 21.210911] pci :01:00.0: Using iommu dma mapping [ 21.212635] pci :01:00.1: Adding to iommu group 24 [ 21.214326] pci :01:00.1: Device uses a private identity domain. [ 21.216507] pci :01:00.2: Adding to iommu group 24 [ 21.618173] pci :01:00.4: Adding to iommu group 24 [ 21.619839] pci :01:00.4: Device uses a private identity domain. [ 26.206832] uhci_hcd: USB Universal Host Controller Interface driver [ 26.209044] uhci_hcd :01:00.4: UHCI Host Controller [ 26.210897] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 26.213247] uhci_hcd :01:00.4: detected 8 ports [ 26.214810] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 26.217153] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 26.219171] uhci_hcd :01:00.4: 32bit DMA uses non-identity mapping [ 26.221261] uhci_hcd :01:00.4: unable to allocate consistent memory for frame list [ 26.223787] uhci_hcd :01:00.4: startup error -16 [ 26.225381] uhci_hcd :01:00.4: USB bus 3 deregistered [ 26.227378] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 26.229296] uhci_hcd: probe of :01:00.4 failed with error -16 different system with similar issue: 01:00.0 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support [103c:3306] (rev 07) 01:00.1 VGA compatible controller [0300]: Matrox Electronics Systems Ltd. MGA G200eH3 [102b:0538] (rev 02) (prog-if 00 [VGA controller]) 01:00.2 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging [103c:3307] (rev 07) 01:00.4 USB controller [0c03]: Hewlett-Packard Company iLO5 Virtual USB Controller [103c:22f6] (prog-if 20 [EHCI]) [ 13.695663] pci :01:00.0: Adding to iommu group 10 [ 13.703667] pci :01:00.0: Using iommu dma mapping [ 13.708871] pci :01:00.1: Adding to iommu group 10 [ 13.714033] pci :01:00.1: DMAR: Device uses a private identity domain. [ 13.721033] pci :01:00.2: Adding to iommu group 10 [ 13.726290] pci :01:00.4: Adding to iommu group 10 [ 13.731453] pci :01:00.4: DMAR: Device uses a private identity domain. [ 17.157796] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [ 17.164348] ehci-pci: EHCI PCI platform driver [ 17.170061] ehci-pci :01:00.4: EHCI Host Controller [ 17.175457] ehci-pci :01:00.4: new USB bus registered, assigned bus number 1 [ 17.182912] ehci-pci :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 17.189988] ehci-pci :01:00.4: can't setup: -12 [ 17.194884] ehci-pci :01:00.4: USB bus 1 deregistered [ 17.200567] ehci-pci :01:00.4: init :01:00.4 fail, -12 [ 17.206508] ehci-pci: probe of :01:00.4 failed with error -12 I'm looking through the code and trying to debug it, but any thoughts on this? Regards, Jerry In iommu_need_mapping, in a case like the above does something like dmar_insert_one_dev_info need to happen to associate the device back with the group default domain? In intel_iommu_add_device it is going to get removed and added to the identity domain, and then in iommu_need_mapping it gets removed from the identity domain, and iommu_request_dma_domain_for_dev should return 0 because the group default domain at this point is the correct type. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: warning from domain_get_iommu
On Tue Feb 04 20, Jerry Snitselaar wrote: I'm working on getting a system to reproduce this, and verify it also occurs with 5.5, but I have a report of a case where the kdump kernel gives warnings like the following on a hp dl360 gen9: [2.830589] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [2.832615] ehci-pci: EHCI PCI platform driver [2.834190] ehci-pci :00:1a.0: EHCI Host Controller [2.835974] ehci-pci :00:1a.0: new USB bus registered, assigned bus number 1 [2.838276] ehci-pci :00:1a.0: debug port 2 [2.839700] WARNING: CPU: 0 PID: 1 at drivers/iommu/intel-iommu.c:598 domain_get_iommu+0x55/0x60 [2.840671] Modules linked in: [2.840671] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-170.el8.kdump2.x86_64 #1 [2.840671] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [2.840671] RIP: 0010:domain_get_iommu+0x55/0x60 [2.840671] Code: c2 01 eb 0b 48 83 c0 01 8b 34 87 85 f6 75 0b 48 63 c8 48 39 c2 75 ed 31 c0 c3 48 c1 e1 03 48 8b 05 70 f3 91 01 48 8b 04 08 c3 <0f> 0b 31 c0 c3 31 c9 eb eb 66 90 0f 1f 44 00 00 41 55 40 0f b6 f6 [2.840671] RSP: 0018:c90dfab8 EFLAGS: 00010202 [2.840671] RAX: 88ec7f1c8000 RBX: 006c7c867000 RCX: [2.840671] RDX: fff0 RSI: RDI: 88ec7f1c8000 [2.840671] RBP: 88ec6f7000b0 R08: 88ec7f19d000 R09: 88ec7cbfcd00 [2.840671] R10: 0095 R11: c90df928 R12: [2.840671] R13: 88ec7f1c8000 R14: 1000 R15: [2.840671] FS: () GS:88ec7f60() knlGS: [2.840671] CS: 0010 DS: ES: CR0: 80050033 [2.840671] CR2: 7ff3e1713000 CR3: 006c7de0a004 CR4: 001606b0 [2.840671] Call Trace: [2.840671] __intel_map_single+0x62/0x140 [2.840671] intel_alloc_coherent+0xa6/0x130 [2.840671] dma_pool_alloc+0xd8/0x1e0 [2.840671] e_qh_alloc+0x55/0x130 [2.840671] ehci_setup+0x284/0x7b0 [2.840671] ehci_pci_setup+0xa3/0x530 [2.840671] usb_add_hcd+0x2b6/0x800 [2.840671] usb_hcd_pci_probe+0x375/0x460 [2.840671] local_pci_probe+0x41/0x90 [2.840671] pci_device_probe+0x105/0x1b0 [2.840671] driver_probe_device+0x12d/0x460 [2.840671] device_driver_attach+0x50/0x60 [2.840671] __driver_attach+0x61/0x130 [2.840671] ? device_driver_attach+0x60/0x60 [2.840671] bus_for_each_dev+0x77/0xc0 [2.840671] ? klist_add_tail+0x3b/0x70 [2.840671] bus_add_driver+0x14d/0x1e0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] ? do_early_param+0x91/0x91 [2.840671] driver_register+0x6b/0xb0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] do_one_initcall+0x46/0x1c3 [2.840671] ? do_early_param+0x91/0x91 [2.840671] kernel_init_freeable+0x1af/0x258 [2.840671] ? rest_init+0xaa/0xaa [2.840671] kernel_init+0xa/0xf9 [2.840671] ret_from_fork+0x35/0x40 [2.840671] ---[ end trace e87b0d9a1c8135c4 ]--- [3.010848] ehci-pci :00:1a.0: Using iommu dma mapping [3.012551] ehci-pci :00:1a.0: 32bit DMA uses non-identity mapping [3.018537] ehci-pci :00:1a.0: cache line size of 64 is not supported [3.021188] ehci-pci :00:1a.0: irq 18, io mem 0x93002000 [3.029006] ehci-pci :00:1a.0: USB 2.0 started, EHCI 1.00 [3.030918] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 4.18 [3.033491] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1 [3.035900] usb usb1: Product: EHCI Host Controller [3.037423] usb usb1: Manufacturer: Linux 4.18.0-170.el8.kdump2.x86_64 ehci_hcd [3.039691] usb usb1: SerialNumber: :00:1a.0 It looks like the device finishes initializing once it figures out it needs dma mapping instead of the default passthrough. intel_alloc_coherent calls iommu_need_mapping, before it calls __intel_map_single, so I'm not sure why it is tripping over the WARN_ON in domain_get_iommu. one thing I noticed while looking at this is that domain_get_iommu can return NULL. So should there be something like the following in __intel_map_single after the domain_get_iommu call? if (!iommu) goto error; It is possible to deref the null pointer later otherwise. Regards, Jerry I reproduced the warning with a 5.5 kernel on an Intel NUC5i5MYBE. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Seeing some another issue with mixed domains in the same iommu_group
Hi Baolu, I'm seeing another issue with the devices in the HP ilo when the system is booted with intel_iommu=on and iommu=pt (iommu=nopt does not run into problems). first system: 01:00.0 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support (rev 05) 01:00.1 VGA compatible controller: Matrox Electronics Systems Ltd. MGA G200EH 01:00.2 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging (rev 05) 01:00.4 USB controller: Hewlett-Packard Company Integrated Lights-Out Standard Virtual USB Controller (rev 02) [ 21.208103] pci :01:00.0: Adding to iommu group 24 [ 21.210911] pci :01:00.0: Using iommu dma mapping [ 21.212635] pci :01:00.1: Adding to iommu group 24 [ 21.214326] pci :01:00.1: Device uses a private identity domain. [ 21.216507] pci :01:00.2: Adding to iommu group 24 [ 21.618173] pci :01:00.4: Adding to iommu group 24 [ 21.619839] pci :01:00.4: Device uses a private identity domain. [ 26.206832] uhci_hcd: USB Universal Host Controller Interface driver [ 26.209044] uhci_hcd :01:00.4: UHCI Host Controller [ 26.210897] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 26.213247] uhci_hcd :01:00.4: detected 8 ports [ 26.214810] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 26.217153] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 26.219171] uhci_hcd :01:00.4: 32bit DMA uses non-identity mapping [ 26.221261] uhci_hcd :01:00.4: unable to allocate consistent memory for frame list [ 26.223787] uhci_hcd :01:00.4: startup error -16 [ 26.225381] uhci_hcd :01:00.4: USB bus 3 deregistered [ 26.227378] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 26.229296] uhci_hcd: probe of :01:00.4 failed with error -16 different system with similar issue: 01:00.0 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support [103c:3306] (rev 07) 01:00.1 VGA compatible controller [0300]: Matrox Electronics Systems Ltd. MGA G200eH3 [102b:0538] (rev 02) (prog-if 00 [VGA controller]) 01:00.2 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging [103c:3307] (rev 07) 01:00.4 USB controller [0c03]: Hewlett-Packard Company iLO5 Virtual USB Controller [103c:22f6] (prog-if 20 [EHCI]) [ 13.695663] pci :01:00.0: Adding to iommu group 10 [ 13.703667] pci :01:00.0: Using iommu dma mapping [ 13.708871] pci :01:00.1: Adding to iommu group 10 [ 13.714033] pci :01:00.1: DMAR: Device uses a private identity domain. [ 13.721033] pci :01:00.2: Adding to iommu group 10 [ 13.726290] pci :01:00.4: Adding to iommu group 10 [ 13.731453] pci :01:00.4: DMAR: Device uses a private identity domain. [ 17.157796] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [ 17.164348] ehci-pci: EHCI PCI platform driver [ 17.170061] ehci-pci :01:00.4: EHCI Host Controller [ 17.175457] ehci-pci :01:00.4: new USB bus registered, assigned bus number 1 [ 17.182912] ehci-pci :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 17.189988] ehci-pci :01:00.4: can't setup: -12 [ 17.194884] ehci-pci :01:00.4: USB bus 1 deregistered [ 17.200567] ehci-pci :01:00.4: init :01:00.4 fail, -12 [ 17.206508] ehci-pci: probe of :01:00.4 failed with error -12 I'm looking through the code and trying to debug it, but any thoughts on this? Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: Seeing some another issue with mixed domains in the same iommu_group
On Thu Feb 06 20, Jerry Snitselaar wrote: ... The above cases seem to be avoided by: 9235cb13d7d1 | 2020-01-24 | iommu/vt-d: Allow devices with RMRRs to use identity domain (Lu Baolu) which results in the watchdog device no longer taking a dma domain and switching the group default. Without that patch though when it gets into the iommu_need_mapping code for :01:00.4 after the following: dmar_remove_one_dev_info(dev); ret = iommu_request_dma_domain_for_dev(dev); ret is 0 and dev->archdata.iommu is NULL. Even with 9235cb13d7d1 device_def_domain_type can return return dma, but I'm not sure how likely it is for there to be an iommu group like that again where the group default ends up dma, a device gets removed and added to the identity domain, and then ends up in that code in iommu_need_mapping. Hi Baolu, Would something along these lines makes sense? diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9dc37672bf89..40cc8f5a3ebb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3614,6 +3614,20 @@ static bool iommu_need_mapping(struct device *dev) } dmar_remove_one_dev_info(dev); get_private_domain_for_dev(dev); + } else { + if (dev->archdata.iommu == NULL) { + struct iommu_domain *domain; + struct iommu_group *group; + struct dmar_domain *dmar_domain, *tmp; + + group = iommu_group_get_for_dev(dev); + domain = iommu_group_default_domain(group); + dmar_domain = to_dmar_domain(domain); + tmp = set_domain_for_dev(dev, dmar_domain); + } } dev_info(dev, "32bit DMA uses non-identity mapping\n"); -- Obviously needs some checks added, but this was just an initial test I was trying. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: warning from domain_get_iommu
On Thu Feb 06 20, Jerry Snitselaar wrote: On Tue Feb 04 20, Jerry Snitselaar wrote: I'm working on getting a system to reproduce this, and verify it also occurs with 5.5, but I have a report of a case where the kdump kernel gives warnings like the following on a hp dl360 gen9: [2.830589] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [2.832615] ehci-pci: EHCI PCI platform driver [2.834190] ehci-pci :00:1a.0: EHCI Host Controller [2.835974] ehci-pci :00:1a.0: new USB bus registered, assigned bus number 1 [2.838276] ehci-pci :00:1a.0: debug port 2 [2.839700] WARNING: CPU: 0 PID: 1 at drivers/iommu/intel-iommu.c:598 domain_get_iommu+0x55/0x60 [2.840671] Modules linked in: [2.840671] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-170.el8.kdump2.x86_64 #1 [2.840671] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [2.840671] RIP: 0010:domain_get_iommu+0x55/0x60 [2.840671] Code: c2 01 eb 0b 48 83 c0 01 8b 34 87 85 f6 75 0b 48 63 c8 48 39 c2 75 ed 31 c0 c3 48 c1 e1 03 48 8b 05 70 f3 91 01 48 8b 04 08 c3 <0f> 0b 31 c0 c3 31 c9 eb eb 66 90 0f 1f 44 00 00 41 55 40 0f b6 f6 [2.840671] RSP: 0018:c90dfab8 EFLAGS: 00010202 [2.840671] RAX: 88ec7f1c8000 RBX: 006c7c867000 RCX: [2.840671] RDX: fff0 RSI: RDI: 88ec7f1c8000 [2.840671] RBP: 88ec6f7000b0 R08: 88ec7f19d000 R09: 88ec7cbfcd00 [2.840671] R10: 0095 R11: c90df928 R12: [2.840671] R13: 88ec7f1c8000 R14: 1000 R15: [2.840671] FS: () GS:88ec7f60() knlGS: [2.840671] CS: 0010 DS: ES: CR0: 80050033 [2.840671] CR2: 7ff3e1713000 CR3: 006c7de0a004 CR4: 001606b0 [2.840671] Call Trace: [2.840671] __intel_map_single+0x62/0x140 [2.840671] intel_alloc_coherent+0xa6/0x130 [2.840671] dma_pool_alloc+0xd8/0x1e0 [2.840671] e_qh_alloc+0x55/0x130 [2.840671] ehci_setup+0x284/0x7b0 [2.840671] ehci_pci_setup+0xa3/0x530 [2.840671] usb_add_hcd+0x2b6/0x800 [2.840671] usb_hcd_pci_probe+0x375/0x460 [2.840671] local_pci_probe+0x41/0x90 [2.840671] pci_device_probe+0x105/0x1b0 [2.840671] driver_probe_device+0x12d/0x460 [2.840671] device_driver_attach+0x50/0x60 [2.840671] __driver_attach+0x61/0x130 [2.840671] ? device_driver_attach+0x60/0x60 [2.840671] bus_for_each_dev+0x77/0xc0 [2.840671] ? klist_add_tail+0x3b/0x70 [2.840671] bus_add_driver+0x14d/0x1e0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] ? do_early_param+0x91/0x91 [2.840671] driver_register+0x6b/0xb0 [2.840671] ? ehci_hcd_init+0xaa/0xaa [2.840671] do_one_initcall+0x46/0x1c3 [2.840671] ? do_early_param+0x91/0x91 [2.840671] kernel_init_freeable+0x1af/0x258 [2.840671] ? rest_init+0xaa/0xaa [2.840671] kernel_init+0xa/0xf9 [2.840671] ret_from_fork+0x35/0x40 [2.840671] ---[ end trace e87b0d9a1c8135c4 ]--- [3.010848] ehci-pci :00:1a.0: Using iommu dma mapping [3.012551] ehci-pci :00:1a.0: 32bit DMA uses non-identity mapping [3.018537] ehci-pci :00:1a.0: cache line size of 64 is not supported [3.021188] ehci-pci :00:1a.0: irq 18, io mem 0x93002000 [3.029006] ehci-pci :00:1a.0: USB 2.0 started, EHCI 1.00 [3.030918] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 4.18 [3.033491] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1 [3.035900] usb usb1: Product: EHCI Host Controller [3.037423] usb usb1: Manufacturer: Linux 4.18.0-170.el8.kdump2.x86_64 ehci_hcd [3.039691] usb usb1: SerialNumber: :00:1a.0 It looks like the device finishes initializing once it figures out it needs dma mapping instead of the default passthrough. intel_alloc_coherent calls iommu_need_mapping, before it calls __intel_map_single, so I'm not sure why it is tripping over the WARN_ON in domain_get_iommu. one thing I noticed while looking at this is that domain_get_iommu can return NULL. So should there be something like the following in __intel_map_single after the domain_get_iommu call? if (!iommu) goto error; It is possible to deref the null pointer later otherwise. Regards, Jerry I reproduced the warning with a 5.5 kernel on an Intel NUC5i5MYBE. Hi Baolu, I think I understand what is happening here. With the kdump boot translation is pre-enabled, so in intel_iommu_add_device things are getting set to DEFER_DEVICE_DOMAIN_INFO. When intel_alloc_coherent calls iommu_need_mapping it returns true, but doesn't do the dma domain switch because of DEFER_DEVICE_DOMAIN_INFO. Then __intel_map_single gets called and it calls deferred_attach_domain, which sets the domain to the group domain, which in this case is the identity domain. Then it
Re: Seeing some another issue with mixed domains in the same iommu_group
On Thu Feb 06 20, Jerry Snitselaar wrote: On Thu Feb 06 20, Jerry Snitselaar wrote: Hi Baolu, I'm seeing another issue with the devices in the HP ilo when the system is booted with intel_iommu=on and iommu=pt (iommu=nopt does not run into problems). first system: 01:00.0 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support (rev 05) 01:00.1 VGA compatible controller: Matrox Electronics Systems Ltd. MGA G200EH 01:00.2 System peripheral: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging (rev 05) 01:00.4 USB controller: Hewlett-Packard Company Integrated Lights-Out Standard Virtual USB Controller (rev 02) [ 21.208103] pci :01:00.0: Adding to iommu group 24 [ 21.210911] pci :01:00.0: Using iommu dma mapping [ 21.212635] pci :01:00.1: Adding to iommu group 24 [ 21.214326] pci :01:00.1: Device uses a private identity domain. [ 21.216507] pci :01:00.2: Adding to iommu group 24 [ 21.618173] pci :01:00.4: Adding to iommu group 24 [ 21.619839] pci :01:00.4: Device uses a private identity domain. [ 26.206832] uhci_hcd: USB Universal Host Controller Interface driver [ 26.209044] uhci_hcd :01:00.4: UHCI Host Controller [ 26.210897] uhci_hcd :01:00.4: new USB bus registered, assigned bus number 3 [ 26.213247] uhci_hcd :01:00.4: detected 8 ports [ 26.214810] uhci_hcd :01:00.4: port count misdetected? forcing to 2 ports [ 26.217153] uhci_hcd :01:00.4: irq 16, io base 0x3c00 [ 26.219171] uhci_hcd :01:00.4: 32bit DMA uses non-identity mapping [ 26.221261] uhci_hcd :01:00.4: unable to allocate consistent memory for frame list [ 26.223787] uhci_hcd :01:00.4: startup error -16 [ 26.225381] uhci_hcd :01:00.4: USB bus 3 deregistered [ 26.227378] uhci_hcd :01:00.4: init :01:00.4 fail, -16 [ 26.229296] uhci_hcd: probe of :01:00.4 failed with error -16 different system with similar issue: 01:00.0 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Slave Instrumentation & System Support [103c:3306] (rev 07) 01:00.1 VGA compatible controller [0300]: Matrox Electronics Systems Ltd. MGA G200eH3 [102b:0538] (rev 02) (prog-if 00 [VGA controller]) 01:00.2 System peripheral [0880]: Hewlett-Packard Company Integrated Lights-Out Standard Management Processor Support and Messaging [103c:3307] (rev 07) 01:00.4 USB controller [0c03]: Hewlett-Packard Company iLO5 Virtual USB Controller [103c:22f6] (prog-if 20 [EHCI]) [ 13.695663] pci :01:00.0: Adding to iommu group 10 [ 13.703667] pci :01:00.0: Using iommu dma mapping [ 13.708871] pci :01:00.1: Adding to iommu group 10 [ 13.714033] pci :01:00.1: DMAR: Device uses a private identity domain. [ 13.721033] pci :01:00.2: Adding to iommu group 10 [ 13.726290] pci :01:00.4: Adding to iommu group 10 [ 13.731453] pci :01:00.4: DMAR: Device uses a private identity domain. [ 17.157796] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [ 17.164348] ehci-pci: EHCI PCI platform driver [ 17.170061] ehci-pci :01:00.4: EHCI Host Controller [ 17.175457] ehci-pci :01:00.4: new USB bus registered, assigned bus number 1 [ 17.182912] ehci-pci :01:00.4: DMAR: 32bit DMA uses non-identity mapping [ 17.189988] ehci-pci :01:00.4: can't setup: -12 [ 17.194884] ehci-pci :01:00.4: USB bus 1 deregistered [ 17.200567] ehci-pci :01:00.4: init :01:00.4 fail, -12 [ 17.206508] ehci-pci: probe of :01:00.4 failed with error -12 I'm looking through the code and trying to debug it, but any thoughts on this? Regards, Jerry In iommu_need_mapping, in a case like the above does something like dmar_insert_one_dev_info need to happen to associate the device back with the group default domain? In intel_iommu_add_device it is going to get removed and added to the identity domain, and then in iommu_need_mapping it gets removed from the identity domain, and iommu_request_dma_domain_for_dev should return 0 because the group default domain at this point is the correct type. The above cases seem to be avoided by: 9235cb13d7d1 | 2020-01-24 | iommu/vt-d: Allow devices with RMRRs to use identity domain (Lu Baolu) which results in the watchdog device no longer taking a dma domain and switching the group default. Without that patch though when it gets into the iommu_need_mapping code for :01:00.4 after the following: dmar_remove_one_dev_info(dev); ret = iommu_request_dma_domain_for_dev(dev); ret is 0 and dev->archdata.iommu is NULL. Even with 9235cb13d7d1 device_def_domain_type can return return dma, but I'm not sure how likely it is for there to be an iommu group like that again where the group default ends up dma, a device gets removed and added to the identity domain, and then ends up in that code in iommu_n
Re: warning from domain_get_iommu
On Sat Feb 08 20, Lu Baolu wrote: Hi Jerry, On 2020/2/7 17:34, Jerry Snitselaar wrote: On Thu Feb 06 20, Jerry Snitselaar wrote: On Tue Feb 04 20, Jerry Snitselaar wrote: I'm working on getting a system to reproduce this, and verify it also occurs with 5.5, but I have a report of a case where the kdump kernel gives warnings like the following on a hp dl360 gen9: [ 2.830589] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver [ 2.832615] ehci-pci: EHCI PCI platform driver [ 2.834190] ehci-pci :00:1a.0: EHCI Host Controller [ 2.835974] ehci-pci :00:1a.0: new USB bus registered, assigned bus number 1 [ 2.838276] ehci-pci :00:1a.0: debug port 2 [ 2.839700] WARNING: CPU: 0 PID: 1 at drivers/iommu/intel-iommu.c:598 domain_get_iommu+0x55/0x60 [ 2.840671] Modules linked in: [ 2.840671] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-170.el8.kdump2.x86_64 #1 [ 2.840671] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 07/21/2019 [ 2.840671] RIP: 0010:domain_get_iommu+0x55/0x60 [ 2.840671] Code: c2 01 eb 0b 48 83 c0 01 8b 34 87 85 f6 75 0b 48 63 c8 48 39 c2 75 ed 31 c0 c3 48 c1 e1 03 48 8b 05 70 f3 91 01 48 8b 04 08 c3 <0f> 0b 31 c0 c3 31 c9 eb eb 66 90 0f 1f 44 00 00 41 55 40 0f b6 f6 [ 2.840671] RSP: 0018:c90dfab8 EFLAGS: 00010202 [ 2.840671] RAX: 88ec7f1c8000 RBX: 006c7c867000 RCX: [ 2.840671] RDX: fff0 RSI: RDI: 88ec7f1c8000 [ 2.840671] RBP: 88ec6f7000b0 R08: 88ec7f19d000 R09: 88ec7cbfcd00 [ 2.840671] R10: 0095 R11: c90df928 R12: [ 2.840671] R13: 88ec7f1c8000 R14: 1000 R15: [ 2.840671] FS: () GS:88ec7f60() knlGS: [ 2.840671] CS: 0010 DS: ES: CR0: 80050033 [ 2.840671] CR2: 7ff3e1713000 CR3: 006c7de0a004 CR4: 001606b0 [ 2.840671] Call Trace: [ 2.840671] __intel_map_single+0x62/0x140 [ 2.840671] intel_alloc_coherent+0xa6/0x130 [ 2.840671] dma_pool_alloc+0xd8/0x1e0 [ 2.840671] e_qh_alloc+0x55/0x130 [ 2.840671] ehci_setup+0x284/0x7b0 [ 2.840671] ehci_pci_setup+0xa3/0x530 [ 2.840671] usb_add_hcd+0x2b6/0x800 [ 2.840671] usb_hcd_pci_probe+0x375/0x460 [ 2.840671] local_pci_probe+0x41/0x90 [ 2.840671] pci_device_probe+0x105/0x1b0 [ 2.840671] driver_probe_device+0x12d/0x460 [ 2.840671] device_driver_attach+0x50/0x60 [ 2.840671] __driver_attach+0x61/0x130 [ 2.840671] ? device_driver_attach+0x60/0x60 [ 2.840671] bus_for_each_dev+0x77/0xc0 [ 2.840671] ? klist_add_tail+0x3b/0x70 [ 2.840671] bus_add_driver+0x14d/0x1e0 [ 2.840671] ? ehci_hcd_init+0xaa/0xaa [ 2.840671] ? do_early_param+0x91/0x91 [ 2.840671] driver_register+0x6b/0xb0 [ 2.840671] ? ehci_hcd_init+0xaa/0xaa [ 2.840671] do_one_initcall+0x46/0x1c3 [ 2.840671] ? do_early_param+0x91/0x91 [ 2.840671] kernel_init_freeable+0x1af/0x258 [ 2.840671] ? rest_init+0xaa/0xaa [ 2.840671] kernel_init+0xa/0xf9 [ 2.840671] ret_from_fork+0x35/0x40 [ 2.840671] ---[ end trace e87b0d9a1c8135c4 ]--- [ 3.010848] ehci-pci :00:1a.0: Using iommu dma mapping [ 3.012551] ehci-pci :00:1a.0: 32bit DMA uses non-identity mapping [ 3.018537] ehci-pci :00:1a.0: cache line size of 64 is not supported [ 3.021188] ehci-pci :00:1a.0: irq 18, io mem 0x93002000 [ 3.029006] ehci-pci :00:1a.0: USB 2.0 started, EHCI 1.00 [ 3.030918] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 4.18 [ 3.033491] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1 [ 3.035900] usb usb1: Product: EHCI Host Controller [ 3.037423] usb usb1: Manufacturer: Linux 4.18.0-170.el8.kdump2.x86_64 ehci_hcd [ 3.039691] usb usb1: SerialNumber: :00:1a.0 It looks like the device finishes initializing once it figures out it needs dma mapping instead of the default passthrough. intel_alloc_coherent calls iommu_need_mapping, before it calls __intel_map_single, so I'm not sure why it is tripping over the WARN_ON in domain_get_iommu. one thing I noticed while looking at this is that domain_get_iommu can return NULL. So should there be something like the following in __intel_map_single after the domain_get_iommu call? if (!iommu) goto error; It is possible to deref the null pointer later otherwise. Regards, Jerry I reproduced the warning with a 5.5 kernel on an Intel NUC5i5MYBE. Hi Baolu, I think I understand what is happening here. With the kdump boot translation is pre-enabled, so in intel_iommu_add_device things are getting set to DEFER_DEVICE_DOMAIN_INFO. When intel_alloc_coherent calls iommu_need_mapping it returns true, but doesn't do the dma domain switch because of DEFER_DEVICE_DOMAIN_INFO. Then __intel_map_single gets called and it calls deferred_attach_
Re: arm-smmu.1.auto: Unhandled context fault starting with 5.4-rc1
On Mon Feb 17 20, Robin Murphy wrote: On 16/02/2020 10:11 pm, Jerry Snitselaar wrote: On Fri Feb 14 20, Robin Murphy wrote: Hi Jerry, On 2020-02-14 8:13 pm, Jerry Snitselaar wrote: Hi Will, On a gigabyte system with Cavium CN8xx, when doing a fio test against an nvme drive we are seeing the following: [ 637.161194] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003f6000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.174329] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80136000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.186887] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010002ee000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.199275] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003c7000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.211885] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000392000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.224580] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80118000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.237241] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80100036, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.249657] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801ba000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.262120] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8013e000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.274468] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000304000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 Those "IOVAs" don't look much like IOVAs from the DMA allocator - if they were physical addresses, would they correspond to an expected region of the physical memory map? I would suspect that this is most likely misbehaviour in the NVMe driver (issuing a write to a non-DMA-mapped address), and the SMMU is just doing its job in blocking and reporting it. I also reproduced with 5.5-rc7, and will check 5.6-rc1 later today. I couldn't narrow it down further into 5.4-rc1. I don't know smmu or the code well, any thoughts on where to start digging into this? fio test that is being run is: #fio -filename=/dev/nvme0n1 -iodepth=64 -thread -rw=randwrite -ioengine=libaio -bs=4k -runtime=43200 -size=-group_reporting -name=mytest -numjobs=32 Just to clarify, do other tests work OK on the same device? Thanks, Robin. I was able to get back on the system today. I think I know what the problem is: [ 0.036189] iommu: Gigabyte R120-T34-00 detected, force iommu passthrough mode [ 6.324282] iommu: Default domain type: Translated So the new default domain code in 5.4 overrides the iommu quirk code setting default passthrough. Testing a quick patch that tracks whether the default domain was set in the quirk code, and leaves it alone if it was. So far it seems to be working. Ah, OK. Could you point me at that quirk code? I can't seem to track it down in mainline, and seeing this much leaves me dubious that it's even correct - matching a particular board implies that it's a firmware issue (as far as I'm aware the SMMUs in CN88xx SoCs are usable in general), but if the firmware description is wrong to the point that DMA ops translation doesn't work, then no other translation (e.g. VFIO) is likely to work either. In that case it's simply not safe to enable the SMMU at all, and fudging the default domain type merely hides one symptom of the problem. Robin. Ugh. It is a RHEL only patch, but for some reason it is applied to the ark kernel builds as well. Sorry for the noise. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 5/5] iommu/vt-d: Simplify check in identity_mapping()
On Mon Feb 17 20, Joerg Roedel wrote: From: Joerg Roedel The function only has one call-site and there it is never called with dummy or deferred devices. Simplify the check in the function to account for that. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/5] iommu/vt-d: Add attach_deferred() helper
On Mon Feb 17 20, Joerg Roedel wrote: From: Joerg Roedel Implement a helper function to check whether a device's attach process is deferred. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/5] iommu/vt-d: Do deferred attachment in iommu_need_mapping()
On Mon Feb 17 20, Joerg Roedel wrote: From: Joerg Roedel The attachment of deferred devices needs to happen before the check whether the device is identity mapped or not. Otherwise the check will return wrong results, cause warnings boot failures in kdump kernels, like WARNING: CPU: 0 PID: 318 at ../drivers/iommu/intel-iommu.c:592 domain_get_iommu+0x61/0x70 [...] Call Trace: __intel_map_single+0x55/0x190 intel_alloc_coherent+0xac/0x110 dmam_alloc_attrs+0x50/0xa0 ahci_port_start+0xfb/0x1f0 [libahci] ata_host_start.part.39+0x104/0x1e0 [libata] With the earlier check the kdump boot succeeds and a crashdump is written. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 2/5] iommu/vt-d: Move deferred device attachment into helper function
On Mon Feb 17 20, Joerg Roedel wrote: From: Joerg Roedel Move the code that does the deferred device attachment into a separate helper function. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 4/5] iommu/vt-d: Remove deferred_attach_domain()
On Mon Feb 17 20, Joerg Roedel wrote: From: Joerg Roedel The function is now only a wrapper around find_domain(). Remove the function and call find_domain() directly at the call-sites. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: dmar fault right around domain switch in iommu_need_mapping
On Wed Feb 19 20, Lu Baolu wrote: Hi Jerry, On 2020/2/18 23:45, Jerry Snitselaar wrote: Hi Joerg and Baolu, I'm chasing down one last issue. I'm waiting to hear back from them testing with Joerg's patchset, but I'm guessing this will still pop up. It looks like right around when the domain switch occurs in iommu_need_mapping there are some dmar faults (below is from 5.6-rc1 plus earlier fix attempt that moved deferred attach to beginning of iommu_need_mapping): [ 12.546920] DMAR: DRHD: handling fault status reg 2 [ 12.546923] DMAR: [DMA Read] Request device [02:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 12.635193] hpsa :02:00.0: Using iommu dma mapping [ 12.776712] hpsa :02:00.0: DMAR: 32bit DMA uses non-identity mapping [ 14.091219] DMAR: [DMA Read] Request device [07:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 14.180842] DMAR: DRHD: handling fault status reg 202 [ 14.180845] DMAR: [DMA Read] Request device [07:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 14.268756] DMAR: DRHD: handling fault status reg 302 [ 15.542551] hpsa :07:00.0: Using iommu dma mapping [ 15.567256] hpsa :07:00.0: DMAR: 32bit DMA uses non-identity mapping It seems to only happen right then, and then things are fine. Happens during both regular and kdump boot. With the kdump boot the faults are from the hpilo in the logs I'm looking at, so it doesn't seem to be tied to a device, or certain rmrr. The faulting address always seems to be the base address of the rmrr. The dmar tables look sane. Perhaps like this? The device was boot with an identity domain (iommu=pt). When loading the driver for this device, iommu driver finds that it's a 32-bit device and tries to convert it to DMA domain. The rmrr is still active during the switch, hence you see dma faults during that time window. Best regards, baolu It looks like it doesn't occur with Joerg's patchset. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: question about iommu_need_mapping
On Thu Feb 20 20, Lu Baolu wrote: Hi Jerry, On 2020/2/20 7:55, Jerry Snitselaar wrote: Is it possible for a device to end up with dev->archdata.iommu == NULL on iommu_need_mapping in the following instance: 1. iommu_group has dma domain for default 2. device gets private identity domain in intel_iommu_add_device 3. iommu_need_mapping gets called with that device. 4. dmar_remove_one_dev_info sets dev->archdata.iommu = NULL via unlink_domain_info. 5. request_default_domain_for_dev exits after checking that group->default_domain exists, and group->default_domain->type is dma. 6. iommu_request_dma_domain_for_dev returns 0 from request_default_domain_for_dev and a private dma domain isn't created for the device. Yes. It's possible. The case I was seeing went away with commit 9235cb13d7d1 ("iommu/vt-d: Allow devices with RMRRs to use identity domain"), because it changed which domain the group and devices were using, but it seems like it is still a possibility with the code. Baolu, you mentioned possibly removing the domain switch. Commit 98b2fffb5e27 ("iommu/vt-d: Handle 32bit device with identity default domain") makes it sound like the domain switch is required. It's more "nice to have" than "required" if the iommu driver doesn't disable swiotlb explicitly. The device access of system memory higher than the device's addressing capability could go through the bounced buffer implemented in swiotlb. Best regards, baolu Hi Baolu, Would this mean switching to bounce_dma_ops instead? Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: question about iommu_need_mapping
On Thu Feb 20 20, Jerry Snitselaar wrote: On Thu Feb 20 20, Lu Baolu wrote: Hi Jerry, On 2020/2/20 7:55, Jerry Snitselaar wrote: Is it possible for a device to end up with dev->archdata.iommu == NULL on iommu_need_mapping in the following instance: 1. iommu_group has dma domain for default 2. device gets private identity domain in intel_iommu_add_device 3. iommu_need_mapping gets called with that device. 4. dmar_remove_one_dev_info sets dev->archdata.iommu = NULL via unlink_domain_info. 5. request_default_domain_for_dev exits after checking that group->default_domain exists, and group->default_domain->type is dma. 6. iommu_request_dma_domain_for_dev returns 0 from request_default_domain_for_dev and a private dma domain isn't created for the device. Yes. It's possible. The case I was seeing went away with commit 9235cb13d7d1 ("iommu/vt-d: Allow devices with RMRRs to use identity domain"), because it changed which domain the group and devices were using, but it seems like it is still a possibility with the code. Baolu, you mentioned possibly removing the domain switch. Commit 98b2fffb5e27 ("iommu/vt-d: Handle 32bit device with identity default domain") makes it sound like the domain switch is required. It's more "nice to have" than "required" if the iommu driver doesn't disable swiotlb explicitly. The device access of system memory higher than the device's addressing capability could go through the bounced buffer implemented in swiotlb. Best regards, baolu Hi Baolu, Would this mean switching to bounce_dma_ops instead? Never mind. I see that it would go into the dma_direct code. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
arm-smmu.1.auto: Unhandled context fault starting with 5.4-rc1
Hi Will, On a gigabyte system with Cavium CN8xx, when doing a fio test against an nvme drive we are seeing the following: [ 637.161194] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003f6000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.174329] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80136000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.186887] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010002ee000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.199275] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003c7000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.211885] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000392000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.224580] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80118000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.237241] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80100036, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.249657] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801ba000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.262120] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8013e000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.274468] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000304000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 I also reproduced with 5.5-rc7, and will check 5.6-rc1 later today. I couldn't narrow it down further into 5.4-rc1. I don't know smmu or the code well, any thoughts on where to start digging into this? fio test that is being run is: #fio -filename=/dev/nvme0n1 -iodepth=64 -thread -rw=randwrite -ioengine=libaio -bs=4k -runtime=43200 -size=-group_reporting -name=mytest -numjobs=32 Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: arm-smmu.1.auto: Unhandled context fault starting with 5.4-rc1
On Fri Feb 14 20, Robin Murphy wrote: Hi Jerry, On 2020-02-14 8:13 pm, Jerry Snitselaar wrote: Hi Will, On a gigabyte system with Cavium CN8xx, when doing a fio test against an nvme drive we are seeing the following: [ 637.161194] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003f6000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.174329] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80136000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.186887] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010002ee000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.199275] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8010003c7000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.211885] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000392000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.224580] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80118000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.237241] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x80100036, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.249657] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801ba000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.262120] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x8013e000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 [ 637.274468] arm-smmu arm-smmu.1.auto: Unhandled context fault: fsr=0x8402, iova=0x801000304000, fsynr=0x70091, cbfrsynra=0x9000, cb=7 Those "IOVAs" don't look much like IOVAs from the DMA allocator - if they were physical addresses, would they correspond to an expected region of the physical memory map? I would suspect that this is most likely misbehaviour in the NVMe driver (issuing a write to a non-DMA-mapped address), and the SMMU is just doing its job in blocking and reporting it. I also reproduced with 5.5-rc7, and will check 5.6-rc1 later today. I couldn't narrow it down further into 5.4-rc1. I don't know smmu or the code well, any thoughts on where to start digging into this? fio test that is being run is: #fio -filename=/dev/nvme0n1 -iodepth=64 -thread -rw=randwrite -ioengine=libaio -bs=4k -runtime=43200 -size=-group_reporting -name=mytest -numjobs=32 Just to clarify, do other tests work OK on the same device? Thanks, Robin. I was able to get back on the system today. I think I know what the problem is: [0.036189] iommu: Gigabyte R120-T34-00 detected, force iommu passthrough mode [6.324282] iommu: Default domain type: Translated So the new default domain code in 5.4 overrides the iommu quirk code setting default passthrough. Testing a quick patch that tracks whether the default domain was set in the quirk code, and leaves it alone if it was. So far it seems to be working. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
question about iommu_need_mapping
Is it possible for a device to end up with dev->archdata.iommu == NULL on iommu_need_mapping in the following instance: 1. iommu_group has dma domain for default 2. device gets private identity domain in intel_iommu_add_device 3. iommu_need_mapping gets called with that device. 4. dmar_remove_one_dev_info sets dev->archdata.iommu = NULL via unlink_domain_info. 5. request_default_domain_for_dev exits after checking that group->default_domain exists, and group->default_domain->type is dma. 6. iommu_request_dma_domain_for_dev returns 0 from request_default_domain_for_dev and a private dma domain isn't created for the device. The case I was seeing went away with commit 9235cb13d7d1 ("iommu/vt-d: Allow devices with RMRRs to use identity domain"), because it changed which domain the group and devices were using, but it seems like it is still a possibility with the code. Baolu, you mentioned possibly removing the domain switch. Commit 98b2fffb5e27 ("iommu/vt-d: Handle 32bit device with identity default domain") makes it sound like the domain switch is required. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
dmar fault right around domain switch in iommu_need_mapping
Hi Joerg and Baolu, I'm chasing down one last issue. I'm waiting to hear back from them testing with Joerg's patchset, but I'm guessing this will still pop up. It looks like right around when the domain switch occurs in iommu_need_mapping there are some dmar faults (below is from 5.6-rc1 plus earlier fix attempt that moved deferred attach to beginning of iommu_need_mapping): [ 12.546920] DMAR: DRHD: handling fault status reg 2 [ 12.546923] DMAR: [DMA Read] Request device [02:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 12.635193] hpsa :02:00.0: Using iommu dma mapping [ 12.776712] hpsa :02:00.0: DMAR: 32bit DMA uses non-identity mapping [ 14.091219] DMAR: [DMA Read] Request device [07:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 14.180842] DMAR: DRHD: handling fault status reg 202 [ 14.180845] DMAR: [DMA Read] Request device [07:00.0] PASID fault addr 791dd000 [fault reason 02] Present bit in context entry is clear [ 14.268756] DMAR: DRHD: handling fault status reg 302 [ 15.542551] hpsa :07:00.0: Using iommu dma mapping [ 15.567256] hpsa :07:00.0: DMAR: 32bit DMA uses non-identity mapping It seems to only happen right then, and then things are fine. Happens during both regular and kdump boot. With the kdump boot the faults are from the hpilo in the logs I'm looking at, so it doesn't seem to be tied to a device, or certain rmrr. The faulting address always seems to be the base address of the rmrr. The dmar tables look sane. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/5 v2] iommu/vt-d: Do deferred attachment in iommu_need_mapping()
On Tue Feb 18 20, Joerg Roedel wrote: Hi Baolu, On Tue, Feb 18, 2020 at 10:38:14AM +0800, Lu Baolu wrote: > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 42cdcce1602e..32f43695a22b 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -2541,9 +2541,6 @@ static void do_deferred_attach(struct device *dev) > static struct dmar_domain *deferred_attach_domain(struct device *dev) > { > - if (unlikely(attach_deferred(dev))) > - do_deferred_attach(dev); > - This should also be moved to the call place of deferred_attach_domain() in bounce_map_single(). bounce_map_single() assumes that devices always use DMA domain, so it doesn't call iommu_need_mapping(). We could do_deferred_attach() there manually. Good point, thanks for your review. Updated patch below. From 3a5b8a66d288d86ac1fd45092e7d96f842d0cccf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:20:59 +0100 Subject: [PATCH 3/5] iommu/vt-d: Do deferred attachment in iommu_need_mapping() The attachment of deferred devices needs to happen before the check whether the device is identity mapped or not. Otherwise the check will return wrong results, cause warnings boot failures in kdump kernels, like WARNING: CPU: 0 PID: 318 at ../drivers/iommu/intel-iommu.c:592 domain_get_iommu+0x61/0x70 [...] Call Trace: __intel_map_single+0x55/0x190 intel_alloc_coherent+0xac/0x110 dmam_alloc_attrs+0x50/0xa0 ahci_port_start+0xfb/0x1f0 [libahci] ata_host_start.part.39+0x104/0x1e0 [libata] With the earlier check the kdump boot succeeds and a crashdump is written. Signed-off-by: Joerg Roedel Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/vt-d: call __dmar_remove_one_dev_info with valid pointer
It is possible for archdata.iommu to be set to DEFER_DEVICE_DOMAIN_INFO or DUMMY_DEVICE_DOMAIN_INFO so check for those values before calling __dmar_remove_one_dev_info. Without a check it can result in a null pointer dereference. This has been seen while booting a kdump kernel on an HP dl380 gen9. Cc: Joerg Roedel Cc: Lu Baolu Cc: David Woodhouse Cc: sta...@vger.kernel.org # 5.3+ Cc: linux-ker...@vger.kernel.org Fixes: ae23bfb68f28 ("iommu/vt-d: Detach domain before using a private one") Signed-off-by: Jerry Snitselaar --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1801f0aaf013..932267f49f9a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5163,7 +5163,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(_domain_lock, flags); info = dev->archdata.iommu; - if (info) + if (info && info != DEFER_DEVICE_DOMAIN_INFO + && info != DUMMY_DEVICE_DOMAIN_INFO) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(_domain_lock, flags); } -- 2.24.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Add a quirk flag for scope mismatched devices
On Tue Dec 24 19, Lu Baolu wrote: We expect devices with endpoint scope to have normal PCI headers, and devices with bridge scope to have bridge PCI headers. However Some PCI devices may be listed in the DMAR table with bridge scope, even though they have a normal PCI header. Add a quirk flag for those special devices. Cc: Roland Dreier Cc: Jim Yan Signed-off-by: Lu Baolu --- Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 22/22] iommu/vt-d: Add a quirk flag for scope mismatched devices
On Wed Jan 01 20, Roland Dreier via iommu wrote: We saw more devices with the same mismatch quirk. So maintaining them in a quirk table will make it more readable and maintainable. I guess I disagree about the maintainable part, given that this patch already regresses Broadwell NTB. I'm not even sure what the DMAR table says about NTB on my Skylake systems, exactly because the existing code means I did not have any problems. But we might need to add device 201Ch too. Maybe we don't need the mismatch check at all? Your patch sets the quirk if any possibly mismatching device is present in the system, so we'll ignore any scope mismatch on a system with, say, the 8086:2020 NVMe host in it. So could we just drop the check completely and not have a quirk to disable the check? - R. If the check is removed what happens for cases where there is an actual problem in the dmar table? I just worked an issue with some Intel people where a purley system had an rmrr entry pointing to a bridge as the endpoint device instead of the raid module sitting behind it. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 22/22] iommu/vt-d: Add a quirk flag for scope mismatched devices
On Tue Jan 07 20, Lu Baolu wrote: Hi Jerry, On 1/7/20 1:05 AM, Jerry Snitselaar wrote: On Wed Jan 01 20, Roland Dreier via iommu wrote: We saw more devices with the same mismatch quirk. So maintaining them in a quirk table will make it more readable and maintainable. I guess I disagree about the maintainable part, given that this patch already regresses Broadwell NTB. I'm not even sure what the DMAR table says about NTB on my Skylake systems, exactly because the existing code means I did not have any problems. But we might need to add device 201Ch too. Maybe we don't need the mismatch check at all? Your patch sets the quirk if any possibly mismatching device is present in the system, so we'll ignore any scope mismatch on a system with, say, the 8086:2020 NVMe host in it. So could we just drop the check completely and not have a quirk to disable the check? - R. If the check is removed what happens for cases where there is an actual problem in the dmar table? I just worked an issue with some Intel people where a purley system had an rmrr entry pointing to a bridge as the endpoint device instead of the raid module sitting behind it. The latest solution was here. https://lkml.org/lkml/2020/1/5/103, does this work for you? Best regards, baolu Hi Baolu, They resolved it by updating the rmrr entry in the dmar table to add the extra path needed for it to point at the raid module. Looking at the code though I imagine without the firmware update they would still have the problem because IIRC it was a combo of an endpoint scope type, and a pci bridge header so that first check would fail as it did before. My worry was if the suggestion is to remove the check completely, a case like that wouldn't report anything wrong. Jim's latest patch I think solves the issue for what he was seeing and the NTB case. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2] iommu/vt-d: Don't reject nvme host due to scope mismatch
On Sun Jan 05 20, jimyan wrote: On a system with an Intel PCIe port configured as a nvme host device, iommu initialization fails with DMAR: Device scope type does not match for :80:00.0 This is because the DMAR table reports this device as having scope 2 (ACPI_DMAR_SCOPE_TYPE_BRIDGE): but the device has a type 0 PCI header: 80:00.0 Class 0600: Device 8086:2020 (rev 06) 00: 86 80 20 20 47 05 10 00 06 00 00 06 10 00 00 00 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 00 00 30: 00 00 00 00 90 00 00 00 00 00 00 00 00 01 00 00 VT-d works perfectly on this system, so there's no reason to bail out on initialization due to this apparent scope mismatch. Add the class 0x06 ("PCI_BASE_CLASS_BRIDGE") as a heuristic for allowing DMAR initialization for non-bridge PCI devices listed with scope bridge. Signed-off-by: jimyan Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 0/3] Replace private domain with per-group default domain
On Wed May 06 20, Lu Baolu wrote: Some devices are required to use a specific type (identity or dma) of default domain when they are used with a vendor iommu. When the system level default domain type is different from it, the vendor iommu driver has to request a new default domain with either iommu_request_dma_domain_for_dev() or iommu_request_dm_for_dev() in the add_dev() callback. Unfortunately, these two helpers only work when the group hasn't been assigned to any other devices, hence, some vendor iommu driver has to use a private domain if it fails to request a new default one. Joerg proposed an on-going proposal which makes the default domain framework to support configuring per-group default domain during boot process. https://lkml.org/lkml/2020/4/14/616 [This has been applied in iommu/next.] Hence, there is no need to keep the private domain implementation in the Intel IOMMU driver. This patch series aims to remove it. Best regards, baolu Change log: v3->v4: - Make the commit message of the first patch more comprehensive. v2->v3: - Port necessary patches on the top of Joerg's new proposal. https://lkml.org/lkml/2020/4/14/616 The per-group default domain proposed previously in this series will be deprecated due to a race concern between domain switching and device driver probing. v1->v2: - Rename the iommu ops callback to def_domain_type Lu Baolu (3): iommu/vt-d: Allow 32bit devices to uses DMA domain iommu/vt-d: Allow PCI sub-hierarchy to use DMA domain iommu/vt-d: Apply per-device dma_ops drivers/iommu/intel-iommu.c | 396 +++- 1 file changed, 26 insertions(+), 370 deletions(-) -- 2.17.1 Reviewed-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
amd kdump failure with iommu=nopt
We've seen kdump failures with recent kernels (5.5, 5.6, 5.7-rc1) on amd systems when iommu is enabled in translation mode. In the cases so far there has been mpt3sas involved, but I'm also seeing io page faults for ahci right before mpt3sas has an io page fault: [ 15.156620] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xfff9b300 flags=0x0020] [ 15.166889] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xfff9b320 flags=0x0020] [ 15.177169] ata2: SATA link up 6.0 Gbps (SStatus 133 SControl 300) [ 15.186100] ata4.00: failed to IDENTIFY (device reports invalid type, err_mask=0x0) [ 15.193786] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f730c0 flags=0x0020] [ 15.204059] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f732c0 flags=0x0020] [ 15.214327] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f734c0 flags=0x0020] [ 15.224597] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f736c0 flags=0x0020] [ 15.234867] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f738c0 flags=0x0020] [ 15.245138] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ac0 flags=0x0020] [ 15.255407] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73cc0 flags=0x0020] [ 15.265677] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ec0 flags=0x0020] [ 20.599101] ata2.00: failed to IDENTIFY (INIT_DEV_PARAMS failed, err_mask=0x80) [ 20.916172] ata4: SATA link up 1.5 Gbps (SStatus 113 SControl 300) [ 20.922429] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xfff9b300 flags=0x0020] [ 20.932703] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xfff9b320 flags=0x0020] [ 20.943234] ata2: SATA link up 6.0 Gbps (SStatus 133 SControl 300) [ 20.949430] ata4.00: failed to IDENTIFY (device reports invalid type, err_mask=0x0) [ 20.957115] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f730c0 flags=0x0020] [ 20.967384] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f732c0 flags=0x0020] [ 20.977654] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f734c0 flags=0x0020] [ 20.987923] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f736c0 flags=0x0020] [ 20.998193] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f738c0 flags=0x0020] [ 21.008464] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ac0 flags=0x0020] [ 21.018733] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73cc0 flags=0x0020] [ 21.029005] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ec0 flags=0x0020] [ 26.231097] ata2.00: failed to IDENTIFY (INIT_DEV_PARAMS failed, err_mask=0x80) [ 26.238415] ata2: limiting SATA link speed to 3.0 Gbps [ 26.548169] ata4: SATA link up 1.5 Gbps (SStatus 113 SControl 300) [ 26.564483] ata2: SATA link up 6.0 Gbps (SStatus 133 SControl 320) [ 26.571026] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f730c0 flags=0x0020] [ 26.581301] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f732c0 flags=0x0020] [ 26.591568] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f734c0 flags=0x0020] [ 26.601839] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f736c0 flags=0x0020] [ 26.612109] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f738c0 flags=0x0020] [ 26.622377] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ac0 flags=0x0020] [ 26.632647] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73cc0 flags=0x0020] [ 26.642917] ahci :63:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0042 address=0xf1f73ec0 flags=0x0020] [ 26.654047] ata2.00: failed to IDENTIFY (INIT_DEV_PARAMS failed, err_mask=0x80) [ 26.743097] xhci_hcd :05:00.3: Error while assigning device slot ID [ 26.749718] xhci_hcd :05:00.3: Max number of devices this xHCI host supports is 64. [ 26.757730] usb usb1-port2: couldn't allocate usb_device [ 26.987555] mpt3sas version 33.100.00.00 loaded [ 26.994668] mpt3sas_cm0: 63 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (226256 kB) [ 27.060443] mpt3sas_cm0: CurrentHostPageSize is 0: Setting default host page size to 4k [ 27.068469] mpt3sas_cm0: MSI-X vectors supported: 96 [ 27.073444] no of cores: 1, max_msix_vectors: -1 [ 27.078244]
Re: [PATCH] iommu: Implement deferred domain attachment
On Mon May 18 20, Joerg Roedel wrote: On Fri, May 15, 2020 at 08:23:13PM +0100, Robin Murphy wrote: But that's not what this is; this is (supposed to be) the exact same "don't actually perform the attach yet" logic as before, just restricting it to default domains in the one place that it actually needs to be, so as not to fundamentally bugger up iommu_attach_device() in a way that prevents it from working as expected at the correct point later. You are right, that is better. I tested it and it seems to work. Updated diff attached, with a minor cleanup included. Mind sending it as a proper patch I can send upstream? Thanks, Joerg I should have this tested this afternoon. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: Implement deferred domain attachment
On Mon May 18 20, Joerg Roedel wrote: On Fri, May 15, 2020 at 08:23:13PM +0100, Robin Murphy wrote: But that's not what this is; this is (supposed to be) the exact same "don't actually perform the attach yet" logic as before, just restricting it to default domains in the one place that it actually needs to be, so as not to fundamentally bugger up iommu_attach_device() in a way that prevents it from working as expected at the correct point later. You are right, that is better. I tested it and it seems to work. Updated diff attached, with a minor cleanup included. Mind sending it as a proper patch I can send upstream? Thanks, Joerg diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7b375421afba..a9d02bc3ab5b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -693,6 +693,15 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, return ret; } +static bool iommu_is_attach_deferred(struct iommu_domain *domain, +struct device *dev) +{ + if (domain->ops->is_attach_deferred) + return domain->ops->is_attach_deferred(domain, dev); + + return false; +} + /** * iommu_group_add_device - add a device to an iommu group * @group: the group into which to add the device (reference should be held) @@ -705,6 +714,7 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev) { int ret, i = 0; struct group_device *device; + struct iommu_domain *domain; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) @@ -747,7 +757,8 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev) mutex_lock(>mutex); list_add_tail(>list, >devices); - if (group->domain) + domain = group->domain; + if (domain && !iommu_is_attach_deferred(domain, dev)) ret = __iommu_attach_device(group->domain, dev); mutex_unlock(>mutex); if (ret) @@ -1653,9 +1664,6 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { int ret; - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) - return 0; if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; @@ -1727,8 +1735,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid); static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) + if (iommu_is_attach_deferred(domain, dev)) return; if (unlikely(domain->ops->detach_dev == NULL)) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu This worked for me as well. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: amd kdump failure with iommu=nopt
On Thu May 14 20, Joerg Roedel wrote: On Thu, May 14, 2020 at 05:36:23PM +0200, Joerg Roedel wrote: This commit also removes the deferred attach of the device to its new domain. Does the attached diff fix the problem for you? +static int __iommu_attach_device_no_defer(struct iommu_domain *domain, + struct device *dev) +{ if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; ret = domain->ops->attach_dev(domain, dev); if (!ret) trace_attach_device_to_domain(dev); + return ret; } Sorry, this didn't compile, here is an updated version that actually compiles: diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4050569188be..f54ebb964271 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1889,13 +1889,19 @@ void iommu_domain_free(struct iommu_domain *domain) } EXPORT_SYMBOL_GPL(iommu_domain_free); -static int __iommu_attach_device(struct iommu_domain *domain, -struct device *dev) +static bool __iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + if (!domain->ops->is_attach_deferred) + return false; + + return domain->ops->is_attach_deferred(domain, dev); +} + +static int __iommu_attach_device_no_defer(struct iommu_domain *domain, + struct device *dev) { int ret; - if ((domain->ops->is_attach_deferred != NULL) && - domain->ops->is_attach_deferred(domain, dev)) - return 0; if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; @@ -1903,9 +1909,19 @@ static int __iommu_attach_device(struct iommu_domain *domain, ret = domain->ops->attach_dev(domain, dev); if (!ret) trace_attach_device_to_domain(dev); + return ret; } +static int __iommu_attach_device(struct iommu_domain *domain, +struct device *dev) +{ + if (__iommu_is_attach_deferred(domain, dev)) + return 0; + + return __iommu_attach_device_no_defer(domain, dev); +} + int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2023,7 +2039,12 @@ EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); */ struct iommu_domain *iommu_get_dma_domain(struct device *dev) { - return dev->iommu_group->default_domain; + struct iommu_domain *domain = dev->iommu_group->default_domain; + + if (__iommu_is_attach_deferred(domain, dev)) + __iommu_attach_device_no_defer(domain, dev); + + return domain; } /* ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Yes, that works. Tested-by: Jerry Snitselaar ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
kdump boot failing with IVRS checksum failure
Hello Joerg, We are seeing a kdump kernel boot failure in test on an HP DL325 Gen10 and it was tracked down to 387caf0b759a ("iommu/amd: Treat per-device exclusion ranges as r/w unity-mapped regions"). Reproduced on 5.9-rc5 and goes away with revert of the commit. There is a follow on commit that depends on this that was reverted as well 2ca6b6dc8512 ("iommu/amd: Remove unused variable"). I'm working on getting system access and want to see what the IVRS table looks like, but thought I'd give you heads up. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Tue Apr 14 20, Joerg Roedel wrote: Hi, here is the second version of this patch-set. The first version with some more introductory text can be found here: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/ Changes v1->v2: * Rebased to v5.7-rc1 * Re-wrote the arm-smmu changes as suggested by Robin Murphy * Re-worked the Exynos patches to hopefully not break the driver anymore * Fixed a missing mutex_unlock() reported by Marek Szyprowski, thanks for that. There is also a git-branch available with these patches applied: https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v2 Please review. Thanks, Joerg Joerg Roedel (32): iommu: Move default domain allocation to separate function iommu/amd: Implement iommu_ops->def_domain_type call-back iommu/vt-d: Wire up iommu_ops->def_domain_type iommu/amd: Remove dma_mask check from check_device() iommu/amd: Return -ENODEV in add_device when device is not handled by IOMMU iommu: Add probe_device() and remove_device() call-backs iommu: Move default domain allocation to iommu_probe_device() iommu: Keep a list of allocated groups in __iommu_probe_device() iommu: Move new probe_device path to separate function iommu: Split off default domain allocation from group assignment iommu: Move iommu_group_create_direct_mappings() out of iommu_group_add_device() iommu: Export bus_iommu_probe() and make is safe for re-probing iommu/amd: Remove dev_data->passthrough iommu/amd: Convert to probe/release_device() call-backs iommu/vt-d: Convert to probe/release_device() call-backs iommu/arm-smmu: Convert to probe/release_device() call-backs iommu/pamu: Convert to probe/release_device() call-backs iommu/s390: Convert to probe/release_device() call-backs iommu/virtio: Convert to probe/release_device() call-backs iommu/msm: Convert to probe/release_device() call-backs iommu/mediatek: Convert to probe/release_device() call-backs iommu/mediatek-v1 Convert to probe/release_device() call-backs iommu/qcom: Convert to probe/release_device() call-backs iommu/rockchip: Convert to probe/release_device() call-backs iommu/tegra: Convert to probe/release_device() call-backs iommu/renesas: Convert to probe/release_device() call-backs iommu/omap: Remove orphan_dev tracking iommu/omap: Convert to probe/release_device() call-backs iommu/exynos: Use first SYSMMU in controllers list for IOMMU core iommu/exynos: Convert to probe/release_device() call-backs iommu: Remove add_device()/remove_device() code-paths iommu: Unexport iommu_group_get_for_dev() Sai Praneeth Prakhya (1): iommu: Add def_domain_type() callback in iommu_ops drivers/iommu/amd_iommu.c | 97 drivers/iommu/amd_iommu_types.h | 1 - drivers/iommu/arm-smmu-v3.c | 38 +-- drivers/iommu/arm-smmu.c| 39 ++-- drivers/iommu/exynos-iommu.c| 24 +- drivers/iommu/fsl_pamu_domain.c | 22 +- drivers/iommu/intel-iommu.c | 68 +- drivers/iommu/iommu.c | 393 +--- drivers/iommu/ipmmu-vmsa.c | 60 ++--- drivers/iommu/msm_iommu.c | 34 +-- drivers/iommu/mtk_iommu.c | 24 +- drivers/iommu/mtk_iommu_v1.c| 50 ++-- drivers/iommu/omap-iommu.c | 99 ++-- drivers/iommu/qcom_iommu.c | 24 +- drivers/iommu/rockchip-iommu.c | 26 +-- drivers/iommu/s390-iommu.c | 22 +- drivers/iommu/tegra-gart.c | 24 +- drivers/iommu/tegra-smmu.c | 31 +-- drivers/iommu/virtio-iommu.c| 41 +--- include/linux/iommu.h | 21 +- 20 files changed, 533 insertions(+), 605 deletions(-) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Hi Joerg, With this patchset, I have an epyc system where if I boot with iommu=nopt and force a dump I will see some io page faults for a nic on the system. The vmcore is harvested and the system reboots. I haven't reproduced it on other systems yet, but without the patchset I don't see the io page faults during the kdump. Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Fri May 29 20, Jerry Snitselaar wrote: On Tue Apr 14 20, Joerg Roedel wrote: Hi, here is the second version of this patch-set. The first version with some more introductory text can be found here: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/ Changes v1->v2: * Rebased to v5.7-rc1 * Re-wrote the arm-smmu changes as suggested by Robin Murphy * Re-worked the Exynos patches to hopefully not break the driver anymore * Fixed a missing mutex_unlock() reported by Marek Szyprowski, thanks for that. There is also a git-branch available with these patches applied: https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v2 Please review. Thanks, Joerg Joerg Roedel (32): iommu: Move default domain allocation to separate function iommu/amd: Implement iommu_ops->def_domain_type call-back iommu/vt-d: Wire up iommu_ops->def_domain_type iommu/amd: Remove dma_mask check from check_device() iommu/amd: Return -ENODEV in add_device when device is not handled by IOMMU iommu: Add probe_device() and remove_device() call-backs iommu: Move default domain allocation to iommu_probe_device() iommu: Keep a list of allocated groups in __iommu_probe_device() iommu: Move new probe_device path to separate function iommu: Split off default domain allocation from group assignment iommu: Move iommu_group_create_direct_mappings() out of iommu_group_add_device() iommu: Export bus_iommu_probe() and make is safe for re-probing iommu/amd: Remove dev_data->passthrough iommu/amd: Convert to probe/release_device() call-backs iommu/vt-d: Convert to probe/release_device() call-backs iommu/arm-smmu: Convert to probe/release_device() call-backs iommu/pamu: Convert to probe/release_device() call-backs iommu/s390: Convert to probe/release_device() call-backs iommu/virtio: Convert to probe/release_device() call-backs iommu/msm: Convert to probe/release_device() call-backs iommu/mediatek: Convert to probe/release_device() call-backs iommu/mediatek-v1 Convert to probe/release_device() call-backs iommu/qcom: Convert to probe/release_device() call-backs iommu/rockchip: Convert to probe/release_device() call-backs iommu/tegra: Convert to probe/release_device() call-backs iommu/renesas: Convert to probe/release_device() call-backs iommu/omap: Remove orphan_dev tracking iommu/omap: Convert to probe/release_device() call-backs iommu/exynos: Use first SYSMMU in controllers list for IOMMU core iommu/exynos: Convert to probe/release_device() call-backs iommu: Remove add_device()/remove_device() code-paths iommu: Unexport iommu_group_get_for_dev() Sai Praneeth Prakhya (1): iommu: Add def_domain_type() callback in iommu_ops drivers/iommu/amd_iommu.c | 97 drivers/iommu/amd_iommu_types.h | 1 - drivers/iommu/arm-smmu-v3.c | 38 +-- drivers/iommu/arm-smmu.c| 39 ++-- drivers/iommu/exynos-iommu.c| 24 +- drivers/iommu/fsl_pamu_domain.c | 22 +- drivers/iommu/intel-iommu.c | 68 +- drivers/iommu/iommu.c | 393 +--- drivers/iommu/ipmmu-vmsa.c | 60 ++--- drivers/iommu/msm_iommu.c | 34 +-- drivers/iommu/mtk_iommu.c | 24 +- drivers/iommu/mtk_iommu_v1.c| 50 ++-- drivers/iommu/omap-iommu.c | 99 ++-- drivers/iommu/qcom_iommu.c | 24 +- drivers/iommu/rockchip-iommu.c | 26 +-- drivers/iommu/s390-iommu.c | 22 +- drivers/iommu/tegra-gart.c | 24 +- drivers/iommu/tegra-smmu.c | 31 +-- drivers/iommu/virtio-iommu.c| 41 +--- include/linux/iommu.h | 21 +- 20 files changed, 533 insertions(+), 605 deletions(-) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Hi Joerg, With this patchset, I have an epyc system where if I boot with iommu=nopt and force a dump I will see some io page faults for a nic on the system. The vmcore is harvested and the system reboots. I haven't reproduced it on other systems yet, but without the patchset I don't see the io page faults during the kdump. Regards, Jerry I just hit an issue on a separate intel based system (kdump iommu=nopt), where it panics in during intel_iommu_attach_device, in is_aux_domain, due to device_domain_info being DEFER_DEVICE_DOMAIN_INFO. That doesn't get set to a valid address until the domain_add_dev_info call. Is it as simple as the following? diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 29d3940847d3..f1bbeed46a4c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5053,8 +5053,8 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) { struct device_domain_info *info = dev->archdata.iommu; - return info && info->auxd_enabled && - domain->type == IOMMU_DOMAIN_U
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Mon Jun 01 20, Jerry Snitselaar wrote: On Fri May 29 20, Jerry Snitselaar wrote: On Tue Apr 14 20, Joerg Roedel wrote: Hi, here is the second version of this patch-set. The first version with some more introductory text can be found here: https://lore.kernel.org/lkml/20200407183742.4344-1-j...@8bytes.org/ Changes v1->v2: * Rebased to v5.7-rc1 * Re-wrote the arm-smmu changes as suggested by Robin Murphy * Re-worked the Exynos patches to hopefully not break the driver anymore * Fixed a missing mutex_unlock() reported by Marek Szyprowski, thanks for that. There is also a git-branch available with these patches applied: https://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git/log/?h=iommu-probe-device-v2 Please review. Thanks, Joerg Joerg Roedel (32): iommu: Move default domain allocation to separate function iommu/amd: Implement iommu_ops->def_domain_type call-back iommu/vt-d: Wire up iommu_ops->def_domain_type iommu/amd: Remove dma_mask check from check_device() iommu/amd: Return -ENODEV in add_device when device is not handled by IOMMU iommu: Add probe_device() and remove_device() call-backs iommu: Move default domain allocation to iommu_probe_device() iommu: Keep a list of allocated groups in __iommu_probe_device() iommu: Move new probe_device path to separate function iommu: Split off default domain allocation from group assignment iommu: Move iommu_group_create_direct_mappings() out of iommu_group_add_device() iommu: Export bus_iommu_probe() and make is safe for re-probing iommu/amd: Remove dev_data->passthrough iommu/amd: Convert to probe/release_device() call-backs iommu/vt-d: Convert to probe/release_device() call-backs iommu/arm-smmu: Convert to probe/release_device() call-backs iommu/pamu: Convert to probe/release_device() call-backs iommu/s390: Convert to probe/release_device() call-backs iommu/virtio: Convert to probe/release_device() call-backs iommu/msm: Convert to probe/release_device() call-backs iommu/mediatek: Convert to probe/release_device() call-backs iommu/mediatek-v1 Convert to probe/release_device() call-backs iommu/qcom: Convert to probe/release_device() call-backs iommu/rockchip: Convert to probe/release_device() call-backs iommu/tegra: Convert to probe/release_device() call-backs iommu/renesas: Convert to probe/release_device() call-backs iommu/omap: Remove orphan_dev tracking iommu/omap: Convert to probe/release_device() call-backs iommu/exynos: Use first SYSMMU in controllers list for IOMMU core iommu/exynos: Convert to probe/release_device() call-backs iommu: Remove add_device()/remove_device() code-paths iommu: Unexport iommu_group_get_for_dev() Sai Praneeth Prakhya (1): iommu: Add def_domain_type() callback in iommu_ops drivers/iommu/amd_iommu.c | 97 drivers/iommu/amd_iommu_types.h | 1 - drivers/iommu/arm-smmu-v3.c | 38 +-- drivers/iommu/arm-smmu.c| 39 ++-- drivers/iommu/exynos-iommu.c| 24 +- drivers/iommu/fsl_pamu_domain.c | 22 +- drivers/iommu/intel-iommu.c | 68 +- drivers/iommu/iommu.c | 393 +--- drivers/iommu/ipmmu-vmsa.c | 60 ++--- drivers/iommu/msm_iommu.c | 34 +-- drivers/iommu/mtk_iommu.c | 24 +- drivers/iommu/mtk_iommu_v1.c| 50 ++-- drivers/iommu/omap-iommu.c | 99 ++-- drivers/iommu/qcom_iommu.c | 24 +- drivers/iommu/rockchip-iommu.c | 26 +-- drivers/iommu/s390-iommu.c | 22 +- drivers/iommu/tegra-gart.c | 24 +- drivers/iommu/tegra-smmu.c | 31 +-- drivers/iommu/virtio-iommu.c| 41 +--- include/linux/iommu.h | 21 +- 20 files changed, 533 insertions(+), 605 deletions(-) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu Hi Joerg, With this patchset, I have an epyc system where if I boot with iommu=nopt and force a dump I will see some io page faults for a nic on the system. The vmcore is harvested and the system reboots. I haven't reproduced it on other systems yet, but without the patchset I don't see the io page faults during the kdump. Regards, Jerry I just hit an issue on a separate intel based system (kdump iommu=nopt), where it panics in during intel_iommu_attach_device, in is_aux_domain, due to device_domain_info being DEFER_DEVICE_DOMAIN_INFO. That doesn't get set to a valid address until the domain_add_dev_info call. Is it as simple as the following? diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 29d3940847d3..f1bbeed46a4c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5053,8 +5053,8 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) { struct device_domain_info *info = dev->archdata.iommu; - return info && info->auxd_enabled && -
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Tue Jun 02 20, Lu Baolu wrote: Hi Jerry, On 6/1/20 6:42 PM, Jerry Snitselaar wrote: Hi Joerg, With this patchset, I have an epyc system where if I boot with iommu=nopt and force a dump I will see some io page faults for a nic on the system. The vmcore is harvested and the system reboots. I haven't reproduced it on other systems yet, but without the patchset I don't see the io page faults during the kdump. Regards, Jerry I just hit an issue on a separate intel based system (kdump iommu=nopt), where it panics in during intel_iommu_attach_device, in is_aux_domain, due to device_domain_info being DEFER_DEVICE_DOMAIN_INFO. That doesn't get set to a valid address until the domain_add_dev_info call. Is it as simple as the following? I guess you won't hit this issue if you use iommu/next branch of Joerg's tree. We've changed to use a generic helper to retrieve the valid per device iommu data or NULL (if there's no). Best regards, baolu Yeah, that will solve the panic. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 29d3940847d3..f1bbeed46a4c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5053,8 +5053,8 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) { struct device_domain_info *info = dev->archdata.iommu; - return info && info->auxd_enabled && - domain->type == IOMMU_DOMAIN_UNMANAGED; + return info && info != DEFER_DEVICE_DOMAIN_INFO && + info->auxd_enabled && domain->type == IOMMU_DOMAIN_UNMANAGED; } static void auxiliary_link_device(struct dmar_domain *domain, Regards, Jerry ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 00/33] iommu: Move iommu_group setup to IOMMU core code
On Tue Jun 02 20, Joerg Roedel wrote: Hi Jerry, On Mon, Jun 01, 2020 at 05:02:36PM -0700, Jerry Snitselaar wrote: Yeah, that will solve the panic. If you still see the kdump faults, can you please try with the attached diff? I was not able to reproduce them in my setup. Regards, Joerg I have another hp proliant server now, and reproduced. I will have the patch below tested shortly. Minor change, I switched group->domain to domain since group isn't an argument, and *data being passed in comes from group->domain anyways. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b5ea203f6c68..5a6d509f72b6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1680,8 +1680,12 @@ static void probe_alloc_default_domain(struct bus_type *bus, static int iommu_group_do_dma_attach(struct device *dev, void *data) { struct iommu_domain *domain = data; + int ret = 0; - return __iommu_attach_device(domain, dev); + if (!iommu_is_attach_deferred(group->domain, dev)) + ret = __iommu_attach_device(group->domain, dev); + + return ret; } static int __iommu_group_dma_attach(struct iommu_group *group) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/2] iommu/vt-d: Move Kconfig and Makefile bits down into intel directory
Move Intel Kconfig and Makefile bits down into intel directory with the rest of the Intel specific files. Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Jerry Snitselaar --- drivers/iommu/Kconfig| 86 +--- drivers/iommu/Makefile | 8 +--- drivers/iommu/intel/Kconfig | 86 drivers/iommu/intel/Makefile | 7 +++ 4 files changed, 96 insertions(+), 91 deletions(-) create mode 100644 drivers/iommu/intel/Kconfig create mode 100644 drivers/iommu/intel/Makefile diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6dc49ed8377a..281cd6bd0fe0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -176,91 +176,7 @@ config AMD_IOMMU_DEBUGFS This option is -NOT- intended for production environments, and should not generally be enabled. -# Intel IOMMU support -config DMAR_TABLE - bool - -config INTEL_IOMMU - bool "Support for Intel IOMMU using DMA Remapping Devices" - depends on PCI_MSI && ACPI && (X86 || IA64) - select IOMMU_API - select IOMMU_IOVA - select NEED_DMA_MAP_STATE - select DMAR_TABLE - select SWIOTLB - select IOASID - help - DMA remapping (DMAR) devices support enables independent address - translations for Direct Memory Access (DMA) from devices. - These DMA remapping devices are reported via ACPI tables - and include PCI device scope covered by these DMA - remapping devices. - -config INTEL_IOMMU_DEBUGFS - bool "Export Intel IOMMU internals in Debugfs" - depends on INTEL_IOMMU && IOMMU_DEBUGFS - help - !!!WARNING!!! - - DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!! - - Expose Intel IOMMU internals in Debugfs. - - This option is -NOT- intended for production environments, and should - only be enabled for debugging Intel IOMMU. - -config INTEL_IOMMU_SVM - bool "Support for Shared Virtual Memory with Intel IOMMU" - depends on INTEL_IOMMU && X86_64 - select PCI_PASID - select PCI_PRI - select MMU_NOTIFIER - select IOASID - help - Shared Virtual Memory (SVM) provides a facility for devices - to access DMA resources through process address space by - means of a Process Address Space ID (PASID). - -config INTEL_IOMMU_DEFAULT_ON - def_bool y - prompt "Enable Intel DMA Remapping Devices by default" - depends on INTEL_IOMMU - help - Selecting this option will enable a DMAR device at boot time if - one is found. If this option is not selected, DMAR support can - be enabled by passing intel_iommu=on to the kernel. - -config INTEL_IOMMU_BROKEN_GFX_WA - bool "Workaround broken graphics drivers (going away soon)" - depends on INTEL_IOMMU && BROKEN && X86 - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA, at least until this - option is removed in the 2.6.32 kernel. - -config INTEL_IOMMU_FLOPPY_WA - def_bool y - depends on INTEL_IOMMU && X86 - help - Floppy disk drivers are known to bypass DMA API calls - thereby failing to work when IOMMU is enabled. This - workaround will setup a 1:1 mapping for the first - 16MiB to make floppy (an ISA device) work. - -config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON - bool "Enable Intel IOMMU scalable mode by default" - depends on INTEL_IOMMU - help - Selecting this option will enable by default the scalable mode if - hardware presents the capability. The scalable mode is defined in - VT-d 3.0. The scalable mode capability could be checked by reading - /sys/devices/virtual/iommu/dmar*/intel-iommu/ecap. If this option - is not selected, scalable mode support could also be enabled by - passing intel_iommu=sm_on to the kernel. If not sure, please use - the default value. +source "drivers/iommu/intel/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 342190196dfb..71dd2f382e78 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-y += intel/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o @@ -17,13 +18,8 @@ obj-$(CONFIG_AMD_IOMMU_V2) += amd/iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o arm_smmu-objs
[PATCH v2 2/2] iommu/amd: Move Kconfig and Makefile bits down into amd directory
Move AMD Kconfig and Makefile bits down into the amd directory with the rest of the AMD specific files. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar --- drivers/iommu/Kconfig | 45 +- drivers/iommu/Makefile | 5 + drivers/iommu/amd/Kconfig | 44 + drivers/iommu/amd/Makefile | 4 4 files changed, 50 insertions(+), 48 deletions(-) create mode 100644 drivers/iommu/amd/Kconfig create mode 100644 drivers/iommu/amd/Makefile diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 281cd6bd0fe0..24000e7ed0fa 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -132,50 +132,7 @@ config IOMMU_PGTABLES_L2 def_bool y depends on MSM_IOMMU && MMU && SMP && CPU_DCACHE_DISABLE=n -# AMD IOMMU support -config AMD_IOMMU - bool "AMD IOMMU support" - select SWIOTLB - select PCI_MSI - select PCI_ATS - select PCI_PRI - select PCI_PASID - select IOMMU_API - select IOMMU_IOVA - select IOMMU_DMA - depends on X86_64 && PCI && ACPI - help - With this option you can enable support for AMD IOMMU hardware in - your system. An IOMMU is a hardware component which provides - remapping of DMA memory accesses from devices. With an AMD IOMMU you - can isolate the DMA memory of different devices and protect the - system from misbehaving device drivers or hardware. - - You can find out if your system has an AMD IOMMU if you look into - your BIOS for an option to enable it or if you have an IVRS ACPI - table. - -config AMD_IOMMU_V2 - tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU - select MMU_NOTIFIER - help - This option enables support for the AMD IOMMUv2 features of the IOMMU - hardware. Select this option if you want to use devices that support - the PCI PRI and PASID interface. - -config AMD_IOMMU_DEBUGFS - bool "Enable AMD IOMMU internals in DebugFS" - depends on AMD_IOMMU && IOMMU_DEBUGFS - help - !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! !!!WARNING!!! - - DO NOT ENABLE THIS OPTION UNLESS YOU REALLY, -REALLY- KNOW WHAT YOU ARE DOING!!! - Exposes AMD IOMMU device internals in DebugFS. - - This option is -NOT- intended for production environments, and should - not generally be enabled. - +source "drivers/iommu/amd/Kconfig" source "drivers/iommu/intel/Kconfig" config IRQ_REMAP diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 71dd2f382e78..f356bc12b1c7 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += intel/ +obj-y += amd/ intel/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o @@ -12,9 +12,6 @@ obj-$(CONFIG_IOASID) += ioasid.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o -obj-$(CONFIG_AMD_IOMMU) += amd/iommu.o amd/init.o amd/quirks.o -obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd/debugfs.o -obj-$(CONFIG_AMD_IOMMU_V2) += amd/iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig new file mode 100644 index ..1f061d91e0b8 --- /dev/null +++ b/drivers/iommu/amd/Kconfig @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0-only +# AMD IOMMU support +config AMD_IOMMU + bool "AMD IOMMU support" + select SWIOTLB + select PCI_MSI + select PCI_ATS + select PCI_PRI + select PCI_PASID + select IOMMU_API + select IOMMU_IOVA + select IOMMU_DMA + depends on X86_64 && PCI && ACPI + help + With this option you can enable support for AMD IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an AMD IOMMU you + can isolate the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + + You can find out if your system has an AMD IOMMU if you look into + your BIOS for an option to enable it or if you have an IVRS ACPI + table. + +config AMD_IOMMU_V2 + tristate "AMD IOMMU Version 2 driver" + depends on AMD_IOMMU + select MMU_NOTIFIER + help + This option enables support for the AMD IOMMUv2 features of the IOMMU + hardware. Select this option if you want to use devices that support + the PCI PRI and PASID int