[PATCH] iommu/dma: limit the IOVA allocated to dma-ranges region
Limit the IOVA allocated to dma-ranges specified for the device. This is necessary to ensure that IOVA allocated is addressable by device. Signed-off-by: Krishna Reddy --- drivers/iommu/dma-iommu.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9d1cebe7f6cb..e8a8320b571b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -364,6 +364,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova = 0; + dma_addr_t dma_end_addr; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -381,6 +382,10 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) iova_len = roundup_pow_of_two(iova_len); + /* Limit IOVA allocated to device addressable dma-ranges region. */ + dma_end_addr = (dma_addr_t)iovad->dma_32bit_pfn << shift; + dma_limit = dma_limit > dma_end_addr ? dma_end_addr : dma_limit; + if (domain->geometry.force_aperture) dma_limit = min(dma_limit, domain->geometry.aperture_end); -- 2.1.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 07/13] iommu/amd: update to new mmu_notifier semantic
From: Jérôme Glisse Call to mmu_notifier_invalidate_page() are replaced by call to mmu_notifier_invalidate_range() and thus call are bracketed by call to mmu_notifier_invalidate_range_start()/end() Remove now useless invalidate_page callback. Signed-off-by: Jérôme Glisse Cc: Suravee Suthikulpanit Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: Linus Torvalds Cc: Andrea Arcangeli --- drivers/iommu/amd_iommu_v2.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 6629c472eafd..dccf5b76eff2 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -391,13 +391,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn, return 0; } -static void mn_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - __mn_flush_page(mn, address); -} - static void mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -436,7 +429,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops iommu_mn = { .release= mn_release, .clear_flush_young = mn_clear_flush_young, - .invalidate_page= mn_invalidate_page, .invalidate_range = mn_invalidate_range, }; -- 2.13.5 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 08/13] iommu/intel: update to new mmu_notifier semantic
From: Jérôme Glisse Call to mmu_notifier_invalidate_page() are replaced by call to mmu_notifier_invalidate_range() and thus call are bracketed by call to mmu_notifier_invalidate_range_start()/end() Remove now useless invalidate_page callback. Signed-off-by: Jérôme Glisse Cc: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: Linus Torvalds Cc: Andrea Arcangeli --- drivers/iommu/intel-svm.c | 9 - 1 file changed, 9 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f167c0d84ebf..f620dccec8ee 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -223,14 +223,6 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, intel_flush_svm_range(svm, address, 1, 1, 0); } -static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) -{ - struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - - intel_flush_svm_range(svm, address, 1, 1, 0); -} - /* Pages have been freed at this point */ static void intel_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, @@ -285,7 +277,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops intel_mmuops = { .release = intel_mm_release, .change_pte = intel_change_pte, - .invalidate_page = intel_invalidate_page, .invalidate_range = intel_invalidate_range, }; -- 2.13.5 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 00/13] mmu_notifier kill invalidate_page callback v2
From: Jérôme Glisse (Sorry for so many list cross-posting and big cc) Changes since v1: - remove more dead code in kvm (no testing impact) - more accurate end address computation (patch 2) in page_mkclean_one and try_to_unmap_one - added tested-by/reviewed-by gotten so far Tested as both host and guest kernel with KVM nothing is burning yet. Previous cover letter: Please help testing ! The invalidate_page callback suffered from 2 pitfalls. First it used to happen after page table lock was release and thus a new page might have been setup for the virtual address before the call to invalidate_page(). This is in a weird way fixed by c7ab0d2fdc840266b39db94538f74207ec2afbf6 which moved the callback under the page table lock. Which also broke several existing user of the mmu_notifier API that assumed they could sleep inside this callback. The second pitfall was invalidate_page being the only callback not taking a range of address in respect to invalidation but was giving an address and a page. Lot of the callback implementer assumed this could never be THP and thus failed to invalidate the appropriate range for THP pages. By killing this callback we unify the mmu_notifier callback API to always take a virtual address range as input. There is now 2 clear API (I am not mentioning the youngess API which is seldomly used): - invalidate_range_start()/end() callback (which allow you to sleep) - invalidate_range() where you can not sleep but happen right after page table update under page table lock Note that a lot of existing user feels broken in respect to range_start/ range_end. Many user only have range_start() callback but there is nothing preventing them to undo what was invalidated in their range_start() callback after it returns but before any CPU page table update take place. The code pattern use in kvm or umem odp is an example on how to properly avoid such race. In a nutshell use some kind of sequence number and active range invalidation counter to block anything that might undo what the range_start() callback did. If you do not care about keeping fully in sync with CPU page table (ie you can live with CPU page table pointing to new different page for a given virtual address) then you can take a reference on the pages inside the range_start callback and drop it in range_end or when your driver is done with those pages. Last alternative is to use invalidate_range() if you can do invalidation without sleeping as invalidate_range() callback happens under the CPU page table spinlock right after the page table is updated. Note this is barely tested. I intend to do more testing of next few days but i do not have access to all hardware that make use of the mmu_notifier API. First 2 patches convert existing call of mmu_notifier_invalidate_page() to mmu_notifier_invalidate_range() and bracket those call with call to mmu_notifier_invalidate_range_start()/end(). The next 10 patches remove existing invalidate_page() callback as it can no longer happen. Finaly the last page remove it completely so it can RIP. Jérôme Glisse (13): dax: update to new mmu_notifier semantic mm/rmap: update to new mmu_notifier semantic powerpc/powernv: update to new mmu_notifier semantic drm/amdgpu: update to new mmu_notifier semantic IB/umem: update to new mmu_notifier semantic IB/hfi1: update to new mmu_notifier semantic iommu/amd: update to new mmu_notifier semantic iommu/intel: update to new mmu_notifier semantic misc/mic/scif: update to new mmu_notifier semantic sgi-gru: update to new mmu_notifier semantic xen/gntdev: update to new mmu_notifier semantic KVM: update to new mmu_notifier semantic mm/mmu_notifier: kill invalidate_page Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Andrew Morton Cc: Andrea Arcangeli Cc: Joerg Roedel Cc: Dan Williams Cc: Sudeep Dutt Cc: Ashutosh Dixit Cc: Dimitri Sivanich Cc: Jack Steiner Cc: Paolo Bonzini Cc: Radim Krčmář Cc: linuxppc-...@lists.ozlabs.org Cc: dri-de...@lists.freedesktop.org Cc: amd-...@lists.freedesktop.org Cc: linux-r...@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: xen-de...@lists.xenproject.org Cc: k...@vger.kernel.org Jérôme Glisse (13): dax: update to new mmu_notifier semantic mm/rmap: update to new mmu_notifier semantic v2 powerpc/powernv: update to new mmu_notifier semantic drm/amdgpu: update to new mmu_notifier semantic IB/umem: update to new mmu_notifier semantic IB/hfi1: update to new mmu_notifier semantic iommu/amd: update to new mmu_notifier semantic iommu/intel: update to new mmu_notifier semantic misc/mic/scif: update to new mmu_notifier semantic sgi-gru: update to new mmu_notifier semantic xen/gntdev: update to new mmu_notifier semantic KVM: update to new mmu_notifier semantic v2 mm/mmu_notifier: kill invalidate_page arch/arm/include/asm/kvm_host.h | 6 - arch/arm64/include/asm/kvm_host.h| 6 - arch/mips/include/asm/kvm_host.h
Re: [PATCH 02/13] mm/rmap: update to new mmu_notifier semantic
Andrea Arcangeli wrote: > On Wed, Aug 30, 2017 at 08:47:19PM -0400, Jerome Glisse wrote: >> On Wed, Aug 30, 2017 at 04:25:54PM -0700, Nadav Amit wrote: >>> For both CoW and KSM, the correctness is maintained by calling >>> ptep_clear_flush_notify(). If you defer the secondary MMU invalidation >>> (i.e., replacing ptep_clear_flush_notify() with ptep_clear_flush() ), you >>> will cause memory corruption, and page-lock would not be enough. >> >> Just to add up, the IOMMU have its own CPU page table walker and it can >> walk the page table at any time (not the page table current to current >> CPU, IOMMU have an array that match a PASID with a page table and device >> request translation for a given virtual address against a PASID). >> >> So this means the following can happen with ptep_clear_flush() only: >> >> CPU | IOMMU >> | - walk page table populate tlb at addr A >> - clear pte at addr A| >> - set new pte| > mmu_notifier_invalidate_range_end | -flush IOMMU/device tlb > >> Device is using old page and CPU new page :( > > That condition won't be persistent. > >> But with ptep_clear_flush_notify() >> >> CPU | IOMMU >> | - walk page table populate tlb at addr A >> - clear pte at addr A| >> - notify -> invalidate_range | > flush IOMMU/device tlb >> - set new pte| >> >> So now either the IOMMU see the empty pte and trigger a page fault (this is >> if there is a racing IOMMU ATS right after the IOMMU/device tlb flush but >> before setting the new pte) or it see the new pte. Either way both IOMMU >> and CPU have a coherent view of what a virtual address points to. > > Sure, the _notify version is obviously safe. > >> Andrea explained to me the historical reasons set_pte_at_notify call the >> change_pte callback and it was intended so that KVM could update the >> secondary page table directly without having to fault. It is now a pointless >> optimization as the call to range_start() happening in all the places before >> any set_pte_at_notify() invalidate the secondary page table and thus will >> lead to page fault for the vm. I have talk with Andrea on way to bring back >> this optimization. > > Yes, we known for a long time, the optimization got basically disabled > when range_start/end expanded. It'd be nice to optimize change_pte > again but this is for later. > >> Yes we need the following sequence for IOMMU: >> - clear pte >> - invalidate IOMMU/device TLB >> - set new pte >> >> Otherwise the IOMMU page table walker can populate IOMMU/device tlb with >> stall entry. >> >> Note that this is not necessary for all the case. For try_to_unmap it >> is fine for instance to move the IOMMU tlb shoot down after changing the >> CPU page table as we are not pointing the pte to a different page. Either >> we clear the pte or we set a swap entry and as long as the page that use >> to be pointed by the pte is not free before the IOMMU tlb flush then we >> are fine. >> >> In fact i think the only case where we need the above sequence (clear, >> flush secondary tlb, set new pte) is for COW. I think all other cases >> we can get rid of invalidate_range() from inside the page table lock >> and rely on invalidate_range_end() to call unconditionaly. > > Even with CoW, it's not big issue if the IOMMU keeps reading from the > old page for a little while longer (in between PT lock release and > mmu_notifier_invalidate_range_end). > > How can you tell you read the old data a bit longer, because it > noticed the new page only when mmu_notifier_invalidate_range_end run, > and not because the CPU was faster at writing than the IOMMU was fast > at loading the new pagetable? > > I figure it would be detectable only that the CPU could see pageA > changing before pageB. The iommu-v2 could see pageB changing before > pageA. If that's a concern that is the only good reason I can think of > right now, for requiring ->invalidate_page inside the CoW PT lock to > enforce the same ordering. However if the modifications happens > simultaneously and it's a correct runtime, the order must not matter, > but still it would be detectable which may not be desirable. I don’t know what is the memory model that SVM provides, but what you describe here potentially violates it. I don’t think user software should be expected to deal with it. > > Currently the _notify is absolutely needed to run ->invalidate_range > before put_page is run in the CoW code below, because of the put_page > that is done in a not scalable place, it would be better moved down > regardless of ->invalidate_range to reduce the size of the PT lock > protected critical section. > > - if (new_page) > - put_page(new_page); > > pte_unmap_unlock(vmf->pte, vmf->ptl); > mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); > + if (new_page) > + put_page
Re: [PATCH 02/13] mm/rmap: update to new mmu_notifier semantic
On Wed, Aug 30, 2017 at 08:47:19PM -0400, Jerome Glisse wrote: > On Wed, Aug 30, 2017 at 04:25:54PM -0700, Nadav Amit wrote: > > For both CoW and KSM, the correctness is maintained by calling > > ptep_clear_flush_notify(). If you defer the secondary MMU invalidation > > (i.e., replacing ptep_clear_flush_notify() with ptep_clear_flush() ), you > > will cause memory corruption, and page-lock would not be enough. > > Just to add up, the IOMMU have its own CPU page table walker and it can > walk the page table at any time (not the page table current to current > CPU, IOMMU have an array that match a PASID with a page table and device > request translation for a given virtual address against a PASID). > > So this means the following can happen with ptep_clear_flush() only: > > CPU | IOMMU >| - walk page table populate tlb at addr A > - clear pte at addr A| > - set new pte| mmu_notifier_invalidate_range_end | -flush IOMMU/device tlb > > Device is using old page and CPU new page :( That condition won't be persistent. > > But with ptep_clear_flush_notify() > > CPU | IOMMU >| - walk page table populate tlb at addr A > - clear pte at addr A| > - notify -> invalidate_range | > flush IOMMU/device tlb > - set new pte| > > So now either the IOMMU see the empty pte and trigger a page fault (this is > if there is a racing IOMMU ATS right after the IOMMU/device tlb flush but > before setting the new pte) or it see the new pte. Either way both IOMMU > and CPU have a coherent view of what a virtual address points to. Sure, the _notify version is obviously safe. > Andrea explained to me the historical reasons set_pte_at_notify call the > change_pte callback and it was intended so that KVM could update the > secondary page table directly without having to fault. It is now a pointless > optimization as the call to range_start() happening in all the places before > any set_pte_at_notify() invalidate the secondary page table and thus will > lead to page fault for the vm. I have talk with Andrea on way to bring back > this optimization. Yes, we known for a long time, the optimization got basically disabled when range_start/end expanded. It'd be nice to optimize change_pte again but this is for later. > Yes we need the following sequence for IOMMU: > - clear pte > - invalidate IOMMU/device TLB > - set new pte > > Otherwise the IOMMU page table walker can populate IOMMU/device tlb with > stall entry. > > Note that this is not necessary for all the case. For try_to_unmap it > is fine for instance to move the IOMMU tlb shoot down after changing the > CPU page table as we are not pointing the pte to a different page. Either > we clear the pte or we set a swap entry and as long as the page that use > to be pointed by the pte is not free before the IOMMU tlb flush then we > are fine. > > In fact i think the only case where we need the above sequence (clear, > flush secondary tlb, set new pte) is for COW. I think all other cases > we can get rid of invalidate_range() from inside the page table lock > and rely on invalidate_range_end() to call unconditionaly. Even with CoW, it's not big issue if the IOMMU keeps reading from the old page for a little while longer (in between PT lock release and mmu_notifier_invalidate_range_end). How can you tell you read the old data a bit longer, because it noticed the new page only when mmu_notifier_invalidate_range_end run, and not because the CPU was faster at writing than the IOMMU was fast at loading the new pagetable? I figure it would be detectable only that the CPU could see pageA changing before pageB. The iommu-v2 could see pageB changing before pageA. If that's a concern that is the only good reason I can think of right now, for requiring ->invalidate_page inside the CoW PT lock to enforce the same ordering. However if the modifications happens simultaneously and it's a correct runtime, the order must not matter, but still it would be detectable which may not be desirable. Currently the _notify is absolutely needed to run ->invalidate_range before put_page is run in the CoW code below, because of the put_page that is done in a not scalable place, it would be better moved down regardless of ->invalidate_range to reduce the size of the PT lock protected critical section. - if (new_page) - put_page(new_page); pte_unmap_unlock(vmf->pte, vmf->ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + if (new_page) + put_page(new_page); Of course the iommu will not immediately start reading from the new page, but even if it triggers a write protect fault and calls handle_mm_fault, it will find a writeable pte already there and it'll get an ->invalidate_range as soon as mmu_notifier_invalidate_range_end runs so it can sure notice t
Re: [PATCH 04/12] x86: make dma_cache_sync a no-op
On Sun, 27 Aug 2017, Christoph Hellwig wrote: > x86 does not implement DMA_ATTR_NON_CONSISTENT allocations, so it doesn't > make any sense to do any work in dma_cache_sync given that it must be a > no-op when dma_alloc_attrs returns coherent memory. > > Signed-off-by: Christoph Hellwig > --- > arch/x86/include/asm/dma-mapping.h | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/arch/x86/include/asm/dma-mapping.h > b/arch/x86/include/asm/dma-mapping.h > index 398c79889f5c..04877267ad18 100644 > --- a/arch/x86/include/asm/dma-mapping.h > +++ b/arch/x86/include/asm/dma-mapping.h > @@ -70,7 +70,6 @@ static inline void > dma_cache_sync(struct device *dev, void *vaddr, size_t size, > enum dma_data_direction dir) > { > - flush_write_buffers(); > } > > static inline unsigned long dma_alloc_coherent_mask(struct device *dev, > -- > 2.11.0 > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFT] iommu/arm-smmu-v3: Use burst-polling for sync completion
While CMD_SYNC is unlikely to complete immediately such that we never go round the polling loop, with a lightly-loaded queue it may still do so long before the delay period is up. If we have no better completion notifier, use similar logic as we have for SMMUv2 to spin a number of times before each backoff, so that we have more chance of catching syncs which complete relatively quickly and avoid delaying unnecessarily. Signed-off-by: Robin Murphy --- This is mostly here for theoretical completeness - unless it proves to actually give a measurable benefit (I have no idea), I'd be inclined not to consider it for merging. drivers/iommu/arm-smmu-v3.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f5c5da553803..b92cd65f43f8 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -418,6 +418,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_SYNC_TIMEOUT_US 100 /* 1s! */ +#define ARM_SMMU_SYNC_SPIN_COUNT 10 #define MSI_IOVA_BASE 0x800 #define MSI_IOVA_LENGTH0x10 @@ -998,7 +999,7 @@ static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx, ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); struct arm_smmu_queue *q = &smmu->cmdq.q; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); - unsigned int delay = 1; + unsigned int delay = 1, spin_cnt = 0; do { queue_sync_cons(q); @@ -1022,10 +1023,13 @@ static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx, if (wfe) { wfe(); - } else { + } else if (++spin_cnt < ARM_SMMU_SYNC_SPIN_COUNT) { cpu_relax(); + continue; + } else { udelay(delay); delay *= 2; + spin_cnt = 0; } } while (ktime_before(ktime_get(), timeout)); -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 4/4] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock
Even without the MSI trick, we can still do a lot better than hogging the entire queue while it drains. All we actually need to do for the necessary guarantee of completion is wait for our particular command to have been consumed, and as long as we keep track of where it is there is no need to block other CPUs from adding further commands in the meantime. There is one theoretical (but incredibly unlikely) edge case to avoid, where cons has wrapped twice to still appear 'behind' the sync position - this is easily disambiguated by adding a generation count to the queue to indicate when prod wraps, since cons cannot wrap twice without prod having wrapped at least once. Signed-off-by: Robin Murphy --- v2: New drivers/iommu/arm-smmu-v3.c | 81 - 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 311f482b93d5..f5c5da553803 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -417,7 +417,6 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 -#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 100 /* 1s! */ #define ARM_SMMU_SYNC_TIMEOUT_US 100 /* 1s! */ #define MSI_IOVA_BASE 0x800 @@ -540,6 +539,7 @@ struct arm_smmu_queue { struct arm_smmu_cmdq { struct arm_smmu_queue q; spinlock_t lock; + int generation; }; struct arm_smmu_evtq { @@ -770,21 +770,12 @@ static void queue_inc_prod(struct arm_smmu_queue *q) writel(q->prod, q->prod_reg); } -/* - * Wait for the SMMU to consume items. If drain is true, wait until the queue - * is empty. Otherwise, wait until there is at least one free slot. - */ -static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) +/* Wait for the SMMU to consume items, until there is at least one free slot */ +static int queue_poll_cons(struct arm_smmu_queue *q, bool wfe) { - ktime_t timeout; - unsigned int delay = 1; + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); - /* Wait longer if it's queue drain */ - timeout = ktime_add_us(ktime_get(), drain ? - ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : - ARM_SMMU_POLL_TIMEOUT_US); - - while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { + while (queue_sync_cons(q), queue_full(q)) { if (ktime_compare(ktime_get(), timeout) > 0) return -ETIMEDOUT; @@ -792,8 +783,7 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) wfe(); } else { cpu_relax(); - udelay(delay); - delay *= 2; + udelay(1); } } @@ -959,15 +949,20 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } -static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) +static u32 arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) { struct arm_smmu_queue *q = &smmu->cmdq.q; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); while (queue_insert_raw(q, cmd) == -ENOSPC) { - if (queue_poll_cons(q, false, wfe)) + if (queue_poll_cons(q, wfe)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); } + + if (Q_IDX(q, q->prod) == 0) + smmu->cmdq.generation++; + + return q->prod; } static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -997,15 +992,54 @@ static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; } +static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx, + int sync_gen) +{ + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); + struct arm_smmu_queue *q = &smmu->cmdq.q; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + unsigned int delay = 1; + + do { + queue_sync_cons(q); + /* +* If we see updates quickly enough, cons has passed sync_idx, +* but not yet wrapped. At worst, cons might have actually +* wrapped an even number of times, but that still guarantees +* the original sync must have been consumed. +*/ + if (Q_IDX(q, q->cons) >= Q_IDX(q, sync_idx) && + Q_WRP(q, sync_idx) == Q_WRP(q, q->cons)) + return 0; + /* +* Otherwise, cons may have passed sync_idx and wrapped one or +* mo
[PATCH v2 3/4] iommu/arm-smmu-v3: Use CMD_SYNC completion MSI
As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least because we often need to wait for sync completion within someone else's IRQ handler anyway. However, when the SMMU is both coherent and supports MSIs, we can have a lot more fun by not using it as an interrupt at all. Following the example suggested in the architecture and using a write targeting normal memory, we can let callers wait on a status variable outside the lock instead of having to stall the entire queue or even touch MMIO registers. Since multiple sync commands are guaranteed to complete in order, a simple incrementing sequence count is all we need to unambiguously support any realistic number of overlapping waiters. Signed-off-by: Robin Murphy --- v2: Remove redundant 'bool msi' command member, other cosmetic tweaks drivers/iommu/arm-smmu-v3.c | 47 +++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f066725298cd..311f482b93d5 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -377,7 +377,16 @@ #define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_NONE(0UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_MSH_SHIFT 22 +#define CMDQ_SYNC_0_MSH_ISH(3UL << CMDQ_SYNC_0_MSH_SHIFT) +#define CMDQ_SYNC_0_MSIATTR_SHIFT 24 +#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT) +#define CMDQ_SYNC_0_MSIDATA_SHIFT 32 +#define CMDQ_SYNC_0_MSIDATA_MASK 0xUL +#define CMDQ_SYNC_1_MSIADDR_SHIFT 0 +#define CMDQ_SYNC_1_MSIADDR_MASK 0xcUL /* Event queue */ #define EVTQ_ENT_DWORDS4 @@ -409,6 +418,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 100 /* 1s! */ +#define ARM_SMMU_SYNC_TIMEOUT_US 100 /* 1s! */ #define MSI_IOVA_BASE 0x800 #define MSI_IOVA_LENGTH0x10 @@ -504,6 +514,10 @@ struct arm_smmu_cmdq_ent { } pri; #define CMDQ_OP_CMD_SYNC0x46 + struct { + u32 msidata; + u64 msiaddr; + } sync; }; }; @@ -617,6 +631,9 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; + atomic_tsync_nr; + u32 sync_count; + unsigned long ias; /* IPA */ unsigned long oas; /* PA */ unsigned long pgsize_bitmap; @@ -878,7 +895,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } break; case CMDQ_OP_CMD_SYNC: - cmd[0] |= CMDQ_SYNC_0_CS_SEV; + if (ent->sync.msiaddr) + cmd[0] |= CMDQ_SYNC_0_CS_IRQ; + else + cmd[0] |= CMDQ_SYNC_0_CS_SEV; + cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB; + cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT; + cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; break; default: return -ENOENT; @@ -964,21 +987,40 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, spin_unlock_irqrestore(&smmu->cmdq.lock, flags); } +static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) +{ + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); + u32 val = smp_cond_load_acquire(&smmu->sync_count, + (int)(VAL - sync_idx) >= 0 || + !ktime_before(ktime_get(), timeout)); + + return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; +} + static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && + (smmu->features & ARM_SMMU_FEAT_COHERENCY); struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; int ret; + if (msi) { + ent.sync.msidata = atomic_inc_return(&smmu->sync_nr); + ent.sync.msiaddr = virt_to_phys(&smmu->sync_count); + } arm_smmu_cmdq_build_cmd(cmd, &ent); spin_lock_irqsave(&smmu->cmdq.lock, flags); arm_smmu_cmdq_insert_cmd(smmu, cmd); - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + if
[PATCH v2 2/4] iommu/arm-smmu-v3: Forget about cmdq-sync interrupt
The cmdq-sync interrupt is never going to be particularly useful, since for stage 1 DMA at least we'll often need to wait for sync completion within someone else's IRQ handler, thus have to implement polling anyway. Beyond that, the overhead of taking an interrupt, then still having to grovel around in the queue to figure out *which* sync command completed, doesn't seem much more attractive than simple polling either. Furthermore, if an implementation both has wired interrupts and supports MSIs, then we don't want to be taking the IRQ unnecessarily if we're using the MSI write to update memory. Let's just make life simpler by not even bothering to claim it in the first place. Signed-off-by: Robin Murphy --- v2: No change drivers/iommu/arm-smmu-v3.c | 24 +--- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 91f28aca72c0..f066725298cd 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1296,12 +1296,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) return IRQ_HANDLED; } -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev) -{ - /* We don't actually use CMD_SYNC interrupts for anything */ - return IRQ_HANDLED; -} - static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) @@ -1334,10 +1328,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) if (active & GERROR_MSI_EVTQ_ABT_ERR) dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); - if (active & GERROR_MSI_CMDQ_ABT_ERR) { + if (active & GERROR_MSI_CMDQ_ABT_ERR) dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); - arm_smmu_cmdq_sync_handler(irq, smmu->dev); - } if (active & GERROR_PRIQ_ABT_ERR) dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); @@ -1366,7 +1358,6 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) { arm_smmu_gerror_handler(irq, dev); - arm_smmu_cmdq_sync_handler(irq, dev); return IRQ_WAKE_THREAD; } @@ -2283,15 +2274,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) dev_warn(smmu->dev, "failed to enable evtq irq\n"); } - irq = smmu->cmdq.q.irq; - if (irq) { - ret = devm_request_irq(smmu->dev, irq, - arm_smmu_cmdq_sync_handler, 0, - "arm-smmu-v3-cmdq-sync", smmu); - if (ret < 0) - dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n"); - } - irq = smmu->gerr_irq; if (irq) { ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, @@ -2799,10 +2781,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (irq > 0) smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); if (irq > 0) smmu->gerr_irq = irq; -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/4] iommu/arm-smmu-v3: Specialise CMD_SYNC handling
CMD_SYNC already has a bit of special treatment here and there, but as we're about to extend it with more functionality for completing outside the CMDQ lock, things are going to get rather messy if we keep trying to cram everything into a single generic command interface. Instead, let's break out the issuing of CMD_SYNC into its own specific helper where upcoming changes will have room to breathe. Signed-off-by: Robin Murphy --- v2: Cosmetic changes to reduce churn in later patches drivers/iommu/arm-smmu-v3.c | 54 + 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 568c400eeaed..91f28aca72c0 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -936,13 +936,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } +static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) +{ + struct arm_smmu_queue *q = &smmu->cmdq.q; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + + while (queue_insert_raw(q, cmd) == -ENOSPC) { + if (queue_poll_cons(q, false, wfe)) + dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); + } +} + static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); - struct arm_smmu_queue *q = &smmu->cmdq.q; if (arm_smmu_cmdq_build_cmd(cmd, ent)) { dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", @@ -951,16 +960,29 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, } spin_lock_irqsave(&smmu->cmdq.lock, flags); - while (queue_insert_raw(q, cmd) == -ENOSPC) { - if (queue_poll_cons(q, false, wfe)) - dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); - } - - if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) - dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); + arm_smmu_cmdq_insert_cmd(smmu, cmd); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); } +static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + unsigned long flags; + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; + int ret; + + arm_smmu_cmdq_build_cmd(cmd, &ent); + + spin_lock_irqsave(&smmu->cmdq.lock, flags); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + + if (ret) + dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); +} + /* Context descriptor manipulation functions */ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) { @@ -1029,8 +1051,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, @@ -1352,10 +1373,7 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { - struct arm_smmu_cmdq_ent cmd; - - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_sync(void *cookie) @@ -2399,8 +2417,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { @@ -2410,8 +2427,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; arm_smmu_cmdq_issue_cmd(smmu, &cmd); - cmd.opcode = CMDQ_OP_CMD_SYNC; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_sync(smmu); /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 0/4] SMMUv3 CMD_SYNC optimisation
Hi all, Since Nate reported a reasonable performance boost from the out-of-line MSI polling in v1 [1], I've now implemented the equivalent for cons polling as well - that has been boot-tested on D05 with some trivial I/O and at least doesn't seem to lock up or explode. There's also a little cosmetic tweaking to make the patches a bit cleaner as a series. Robin. [1] https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19657.html Robin Murphy (5): iommu/arm-smmu-v3: Specialise CMD_SYNC handling iommu/arm-smmu-v3: Forget about cmdq-sync interrupt iommu/arm-smmu-v3: Use CMD_SYNC completion interrupt iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock iommu/arm-smmu-v3: Use burst-polling for sync completion drivers/iommu/arm-smmu-v3.c | 194 ++-- 1 file changed, 135 insertions(+), 59 deletions(-) -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 0/2] Dual MMU support for TI DRA7xx DSPs
Hi Joerg, The following two patches enhance the OMAP IOMMU driver to support mirror-programming of two MMUs present within the DSP subsystems specifically on TI DRA7xx/AM57xx family of SOCs. The TI OMAP DSP subsystems traditionally always has a DSP core and an internal EDMA block behind a single MMU within the subsystem, but DRA7xx DSP SoCs now separate these out and the blocks are behind an individual MMU and a dedicated port to connect to the interconnect. The DT usage for these will be of the form, dsp1: dsp@4080 { ... iommus = <&mmu0_dsp1>, <&mmu1_dsp1>; ... }; The series is based on your current 4.13 based arm/omap branch. The patches add the support in the driver, but the MMU devices themselves are still not created. There are two separate series (Add hwmod data for IPU & DSP processors/MMUs [1] and Enable DRA7 processor MMU DT nodes [2]) that add these patches, and will go through Tony and the linux-omap tree. The consumer nodes/client drivers are still in-progress and will depend on this series being merged first. Series is tested along-side these two additional series and adding the other consumer patches, no change in behavior with just these patches. regards Suman [1] https://marc.info/?l=linux-omap&m=150335933714097&w=2 [2] https://marc.info/?l=linux-omap&m=150335958414166&w=2 Following is an output of sysfs with these MMUs exercised with additional patches: root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/ drwxr-xr-x3 root root 0 Aug 8 03:59 0 drwxr-xr-x3 root root 0 Aug 8 03:59 1 drwxr-xr-x3 root root 0 Aug 8 03:59 2 drwxr-xr-x3 root root 0 Aug 8 03:59 3 drwxr-xr-x3 root root 0 Aug 8 03:59 4 drwxr-xr-x3 root root 0 Aug 8 03:59 5 root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/*/devices /sys/kernel/iommu_groups/0/devices: lrwxrwxrwx1 root root 0 Aug 8 03:59 4080.dsp -> ../../../../devices/platform/4400.ocp/4080.dsp /sys/kernel/iommu_groups/1/devices: /sys/kernel/iommu_groups/2/devices: lrwxrwxrwx1 root root 0 Aug 8 03:59 5882.ipu -> ../../../../devices/platform/4400.ocp/5882.ipu /sys/kernel/iommu_groups/3/devices: lrwxrwxrwx1 root root 0 Aug 8 03:59 5502.ipu -> ../../../../devices/platform/4400.ocp/5502.ipu /sys/kernel/iommu_groups/4/devices: lrwxrwxrwx1 root root 0 Aug 8 03:59 4100.dsp -> ../../../../devices/platform/4400.ocp/4100.dsp /sys/kernel/iommu_groups/5/devices: root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/*/devices/*/ /sys/kernel/iommu_groups/0/devices/4080.dsp/: lrwxrwxrwx1 root root 0 Aug 8 15:59 driver -> ../../../../bus/platform/drivers/omap-rproc -rw-r--r--1 root root 4096 Aug 8 03:59 driver_override lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu -> ../40d01000.mmu/iommu/40d01000.mmu lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu_group -> ../../../../kernel/iommu_groups/0 -r--r--r--1 root root 4096 Aug 8 03:59 modalias lrwxrwxrwx1 root root 0 Aug 8 03:59 of_node -> ../../../../firmware/devicetree/base/ocp/dsp@4080 drwxr-xr-x2 root root 0 Aug 8 03:59 power drwxr-xr-x3 root root 0 Aug 8 04:03 remoteproc lrwxrwxrwx1 root root 0 Aug 8 03:59 subsystem -> ../../../../bus/platform -rw-r--r--1 root root 4096 Aug 8 03:59 uevent /sys/kernel/iommu_groups/2/devices/5882.ipu/: lrwxrwxrwx1 root root 0 Aug 8 15:59 driver -> ../../../../bus/platform/drivers/omap-rproc -rw-r--r--1 root root 4096 Aug 8 03:59 driver_override lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu -> ../58882000.mmu/iommu/58882000.mmu lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu_group -> ../../../../kernel/iommu_groups/2 -r--r--r--1 root root 4096 Aug 8 03:59 modalias lrwxrwxrwx1 root root 0 Aug 8 03:59 of_node -> ../../../../firmware/devicetree/base/ocp/ipu@5882 drwxr-xr-x2 root root 0 Aug 8 03:59 power drwxr-xr-x3 root root 0 Aug 8 04:03 remoteproc lrwxrwxrwx1 root root 0 Aug 8 03:59 subsystem -> ../../../../bus/platform -rw-r--r--1 root root 4096 Aug 8 03:59 uevent /sys/kernel/iommu_groups/3/devices/5502.ipu/: lrwxrwxrwx1 root root 0 Aug 8 15:59 driver -> ../../../../bus/platform/drivers/omap-rproc -rw-r--r--1 root root 4096 Aug 8 03:59 driver_override lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu -> ../55082000.mmu/iommu/55082000.mmu lrwxrwxrwx1 root root 0 Aug 8 03:59 iommu_group -
[PATCH 2/2] iommu/omap: Add support to program multiple iommus
A client user instantiates and attaches to an iommu_domain to program the OMAP IOMMU associated with the domain. The iommus programmed by a client user are bound with the iommu_domain through the user's device archdata. The OMAP IOMMU driver currently supports only one IOMMU per IOMMU domain per user. The OMAP IOMMU driver has been enhanced to support allowing multiple IOMMUs to be programmed by a single client user. This support is being added mainly to handle the DSP subsystems on the DRA7xx SoCs, which have two MMUs within the same subsystem. These MMUs provide translations to a processor core port and an internal EDMA port. This support allows both the MMUs to be programmed together, but with each one retaining it's own internal state objects. The internal EDMA is managed by the software running on the DSPs, and this design provides on-par functionality with previous generation OMAP DSPs where the EDMA and the DSP core shared the same MMU. The multiple iommus are expected to be provided through a sentinel terminated array of omap_iommu_arch_data objects through the client user's device archdata. The OMAP driver core is enhanced to loop through the array of attached iommus and program them for all common operations. The sentinel-terminated logic is used so as to not change the omap_iommu_arch_data structure. NOTE: 1. The IOMMU groups are still defined for both the DSP MMUs, but the IOMMU device linking uses only the first MMU device. 2. The OMAP IOMMU debugfs code still continues to operate on individual IOMMU objects. Signed-off-by: Suman Anna [t-kri...@ti.com: ported support to 4.13 based kernel] Signed-off-by: Tero Kristo --- drivers/iommu/omap-iommu.c | 307 ++--- drivers/iommu/omap-iommu.h | 30 +++-- 2 files changed, 248 insertions(+), 89 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 81ef729994ce..9dd810f13c28 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -2,6 +2,7 @@ * omap iommu: tlb and pagetable primitives * * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/ * * Written by Hiroshi DOYU , * Paul Mundt and Toshihiro Kobayashi @@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) **/ void omap_iommu_save_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - p[i] = iommu_read_reg(obj, i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + p[i] = iommu_read_reg(obj, i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); @@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); **/ void omap_iommu_restore_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - iommu_write_reg(obj, p[i], i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + iommu_write_reg(obj, p[i], i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); @@ -1068,11 +1089,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, phys_addr_t pa, size_t bytes, int prot) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; struct iotlb_entry e; int omap_pgsz; - u32 ret; + u32 ret = -EINVAL; +
[PATCH 1/2] iommu/omap: Change the attach detection logic
The OMAP IOMMU driver allows only a single device (eg: a rproc device) to be attached per domain. The current attach detection logic relies on a check for an attached iommu for the respective client device. Change this logic to use the client device pointer instead in preparation for supporting multiple iommu devices to be bound to a single iommu domain, and thereby to a client device. Signed-off-by: Suman Anna --- drivers/iommu/omap-iommu.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bd67e1b2c64e..81ef729994ce 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -805,7 +805,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) struct iommu_domain *domain = obj->domain; struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - if (!omap_domain->iommu_dev) + if (!omap_domain->dev) return IRQ_NONE; errs = iommu_report_fault(obj, &da); @@ -1118,8 +1118,8 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) spin_lock(&omap_domain->lock); - /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev) { + /* only a single client device can be attached to a domain */ + if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); ret = -EBUSY; goto out; @@ -1148,9 +1148,14 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, { struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + if (!omap_domain->dev) { + dev_err(dev, "domain has no attached device\n"); + return; + } + /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev != oiommu) { - dev_err(dev, "invalid iommu device\n"); + if (omap_domain->dev != dev) { + dev_err(dev, "invalid attached device\n"); return; } @@ -1219,7 +1224,7 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) * An iommu device is still attached * (currently, only one device can be attached) ? */ - if (omap_domain->iommu_dev) + if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); kfree(omap_domain->pgtable); -- 2.13.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2] iommu/vt-d: Don't be too aggressive when clearing one context entry
On Thu, 2017-08-31 at 10:58 +0200, Filippo Sironi wrote: > Previously, we were invalidating context cache and IOTLB globally when > clearing one context entry. This is a tad too aggressive. > Invalidate the context cache and IOTLB for the interested device only. > > Signed-off-by: Filippo Sironi > Cc: David Woodhouse > Cc: David Woodhouse > Cc: Joerg Roedel > Cc: Jacob Pan > Cc: iommu@lists.linux-foundation.org > Cc: linux-ker...@vger.kernel.org Acked-by: David Woodhouse I'd actually quite like to see us tidy this up some more. If there are aliases, we're doing the flush multiple times. It might be nicer to do no flushing in domain_contact_clear_one(), which is invoked for each alias, and just to update a (u16 *)did. If *did was empty, set it. If it was set (i.e. if there are aliases), then set it to a magic value like 0x. Then we can do the actual flush — either device-specific, or global — just once in domain_context_clear(). We'd probably just move domain_context_clear_one() into its only caller domain_context_clear_one_cb() while we're at it. But in the meantime I'm more than happy with this patch which turns multiple global flushes into multiple domain-specific flushes, and does the right thing in the common case. > --- v2: Changed subject, killed clear_context_table(). Anything else? > drivers/iommu/intel-iommu.c | 42 -- > 1 file changed, 24 insertions(+), 18 deletions(-) > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 3e8636f1220e..1aa4ad7974b9 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -974,20 +974,6 @@ static int device_context_mapped(struct intel_iommu > *iommu, u8 bus, u8 devfn) > return ret; > } > > -static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) > -{ > - struct context_entry *context; > - unsigned long flags; > - > - spin_lock_irqsave(&iommu->lock, flags); > - context = iommu_context_addr(iommu, bus, devfn, 0); > - if (context) { > - context_clear_entry(context); > - __iommu_flush_cache(iommu, context, sizeof(*context)); > - } > - spin_unlock_irqrestore(&iommu->lock, flags); > -} > - > static void free_context_table(struct intel_iommu *iommu) > { > int i; > @@ -2351,13 +2337,33 @@ static inline int domain_pfn_mapping(struct > dmar_domain *domain, unsigned long i > > static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 > devfn) > { > + unsigned long flags; > + struct context_entry *context; > + u16 did_old; > + > if (!iommu) > return; > > - clear_context_table(iommu, bus, devfn); > - iommu->flush.flush_context(iommu, 0, 0, 0, > - DMA_CCMD_GLOBAL_INVL); > - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); > + spin_lock_irqsave(&iommu->lock, flags); > + context = iommu_context_addr(iommu, bus, devfn, 0); > + if (!context) { > + spin_unlock_irqrestore(&iommu->lock, flags); > + return; > + } > + did_old = context_domain_id(context); > + context_clear_entry(context); > + __iommu_flush_cache(iommu, context, sizeof(*context)); > + spin_unlock_irqrestore(&iommu->lock, flags); > + iommu->flush.flush_context(iommu, > + did_old, > + (((u16)bus) << 8) | devfn, Arguably we ought to be PCI_DEVID() there, but I feel bad pointing that out when you're adding about the fifth instance of open-coding it... :) -- dwmw2 smime.p7s Description: S/MIME cryptographic signature Amazon Web Services UK Limited. Registered in England and Wales with registration number 08650665 and which has its registered office at 60 Holborn Viaduct, London EC1A 2FD, United Kingdom. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device
Yes, I was running some tests against the new tree which didn't finish yesterday. Today I am traveling, but will be back im the evening and then I push the tree out. Regards, Joerg Sent from a Galaxy Class Phone Ursprüngliche Nachricht Von: Jon Hunter Datum: 31.08.17 12:23 (GMT+01:00) An: Joerg Roedel Cc: Thierry Reding , Robin Murphy , iommu@lists.linux-foundation.org, linux-te...@vger.kernel.org, linux-ker...@vger.kernel.org, Joerg Roedel Betreff: Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device Hi Joerg, On 30/08/17 16:30, Joerg Roedel wrote: > Hi Jon, > > On Wed, Aug 30, 2017 at 03:22:05PM +0100, Jon Hunter wrote: >> Yes I can confirm that this fixes the crash. I assume that you will fix >> the error path for bus_set_iommu() above as I believe now it needs to >> call iommu_device_sysfs_remove(). > > Thanks for testing the patch. I updated the error-path and put the > commit below into the iommu-tree: Today's -next is still failing and I don't see this fix in your public tree yet [0]. Can you push this out? Cheers Jon [0] https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/ -- nvpublic ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device
Hi Joerg, On 30/08/17 16:30, Joerg Roedel wrote: > Hi Jon, > > On Wed, Aug 30, 2017 at 03:22:05PM +0100, Jon Hunter wrote: >> Yes I can confirm that this fixes the crash. I assume that you will fix >> the error path for bus_set_iommu() above as I believe now it needs to >> call iommu_device_sysfs_remove(). > > Thanks for testing the patch. I updated the error-path and put the > commit below into the iommu-tree: Today's -next is still failing and I don't see this fix in your public tree yet [0]. Can you push this out? Cheers Jon [0] https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/ -- nvpublic ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching
On 2017/8/4 3:41, Nate Watterson wrote: > Hi Robin, > > On 7/31/2017 7:42 AM, Robin Murphy wrote: >> Hi Nate, >> >> On 29/07/17 04:57, Nate Watterson wrote: >>> Hi Robin, >>> I am seeing a crash when performing very basic testing on this series >>> with a Mellanox CX4 NIC. I dug into the crash a bit, and think this >>> patch is the culprit, but this rcache business is still mostly >>> witchcraft to me. >>> >>> # ifconfig eth5 up >>> # ifconfig eth5 down >>> Unable to handle kernel NULL pointer dereference at virtual address >>> 0020 >>> user pgtable: 64k pages, 48-bit VAs, pgd = 8007dbf47c00 >>> [0020] *pgd=0006efab0003, *pud=0006efab0003, >>> *pmd=0007d8720003, *pte= >>> Internal error: Oops: 9607 [#1] SMP >>> Modules linked in: >>> CPU: 47 PID: 5082 Comm: ifconfig Not tainted 4.13.0-rtp-enablement+ #3 >>> task: 8007da1e5780 task.stack: 8007ddcb8000 >>> PC is at __cached_rbnode_delete_update+0x2c/0x58 >>> LR is at private_free_iova+0x2c/0x60 >>> pc : [] lr : [] pstate: 204001c5 >>> sp : 8007ddcbba00 >>> x29: 8007ddcbba00 x28: 8007c8350210 >>> x27: 8007d1a8 x26: 8007dcc20800 >>> x25: 0140 x24: 8007c98f0008 >>> x23: fe4e x22: 0140 >>> x21: 8007c98f0008 x20: 8007c9adb240 >>> x19: 8007c98f0018 x18: 0010 >>> x17: x16: >>> x15: 4000 x14: >>> x13: x12: 0001 >>> x11: dead0200 x10: >>> x9 : x8 : 8007c9adb1c0 >>> x7 : 40002000 x6 : 00210d00 >>> x5 : x4 : c57e >>> x3 : ffcf x2 : ffcf >>> x1 : 8007c9adb240 x0 : >>> [...] >>> [] __cached_rbnode_delete_update+0x2c/0x58 >>> [] private_free_iova+0x2c/0x60 >>> [] iova_magazine_free_pfns+0x4c/0xa0 >>> [] free_iova_fast+0x1b0/0x230 >>> [] iommu_dma_free_iova+0x5c/0x80 >>> [] __iommu_dma_unmap+0x5c/0x98 >>> [] iommu_dma_unmap_resource+0x24/0x30 >>> [] iommu_dma_unmap_page+0xc/0x18 >>> [] __iommu_unmap_page+0x40/0x60 >>> [] mlx5e_page_release+0xbc/0x128 >>> [] mlx5e_dealloc_rx_wqe+0x30/0x40 >>> [] mlx5e_close_channel+0x70/0x1f8 >>> [] mlx5e_close_channels+0x2c/0x50 >>> [] mlx5e_close_locked+0x54/0x68 >>> [] mlx5e_close+0x30/0x58 >>> [...] >>> >>> ** Disassembly for __cached_rbnode_delete_update() near the fault ** >>>92|if (free->pfn_hi < iovad->dma_32bit_pfn) >>> 0852C6C4|ldr x3,[x1,#0x18]; x3,[free,#24] >>> 0852C6C8|ldr x2,[x0,#0x30]; x2,[iovad,#48] >>> 0852C6CC|cmp x3,x2 >>> 0852C6D0|b.cs0x0852C708 >>> |curr = &iovad->cached32_node; >>>94|if (!curr) >>> 0852C6D4|addsx19,x0,#0x18 ; x19,iovad,#24 >>> 0852C6D8|b.eq0x0852C708 >>> | >>> |cached_iova = rb_entry(*curr, struct iova, node); >>> | >>>99|if (free->pfn_lo >= cached_iova->pfn_lo) >>> 0852C6DC|ldr x0,[x19] ; xiovad,[curr] >>> 0852C6E0|ldr x2,[x1,#0x20]; x2,[free,#32] >>> 0852C6E4|ldr x0,[x0,#0x20]; x0,[x0,#32] >>> Apparently cached_iova was NULL so the pfn_lo access faulted. >>> >>> 0852C6E8|cmp x2,x0 >>> 0852C6EC|b.cc0x0852C6FC >>> 0852C6F0|mov x0,x1; x0,free >>> 100|*curr = rb_next(&free->node); >>> After instrumenting the code a bit, this seems to be the culprit. In the >>> previous call, free->pfn_lo was 0x_ which is actually the >>> dma_limit for the domain so rb_next() returns NULL. >>> >>> Let me know if you have any questions or would like additional tests >>> run. I also applied your "DMA domain debug info" patches and dumped the >>> contents of the domain at each of the steps above in case that would be >>> useful. If nothing else, they reinforce how thirsty the CX4 NIC is >>> especially when using 64k pages and many CPUs. >> >> Thanks for the report - I somehow managed to reason myself out of >> keeping the "no cached node" check in __cached_rbnode_delete_update() on >> the assumption that it must always be set by a previous allocation. >> However, there is indeed just one case case for which that fails: when >> you free any IOVA immediately after freeing the very topmost one. Which >> is something that freeing an entire magazine's worth of IOVAs back to >> the tree all at once has a very real chance of doing... >> >> The obvious
[PATCH v2] iommu/vt-d: Don't be too aggressive when clearing one context entry
Previously, we were invalidating context cache and IOTLB globally when clearing one context entry. This is a tad too aggressive. Invalidate the context cache and IOTLB for the interested device only. Signed-off-by: Filippo Sironi Cc: David Woodhouse Cc: David Woodhouse Cc: Joerg Roedel Cc: Jacob Pan Cc: iommu@lists.linux-foundation.org Cc: linux-ker...@vger.kernel.org --- drivers/iommu/intel-iommu.c | 42 -- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3e8636f1220e..1aa4ad7974b9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -974,20 +974,6 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) return ret; } -static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - struct context_entry *context; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); - context = iommu_context_addr(iommu, bus, devfn, 0); - if (context) { - context_clear_entry(context); - __iommu_flush_cache(iommu, context, sizeof(*context)); - } - spin_unlock_irqrestore(&iommu->lock, flags); -} - static void free_context_table(struct intel_iommu *iommu) { int i; @@ -2351,13 +2337,33 @@ static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long i static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) { + unsigned long flags; + struct context_entry *context; + u16 did_old; + if (!iommu) return; - clear_context_table(iommu, bus, devfn); - iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context) { + spin_unlock_irqrestore(&iommu->lock, flags); + return; + } + did_old = context_domain_id(context); + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + spin_unlock_irqrestore(&iommu->lock, flags); + iommu->flush.flush_context(iommu, + did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, +did_old, +0, +0, +DMA_TLB_DSI_FLUSH); } static inline void unlink_domain_info(struct device_domain_info *info) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] intel-iommu: Don't be too aggressive when clearing one context entry
Hi Jacob, > On 30. Aug 2017, at 17:50, Jacob Pan wrote: > > On Mon, 28 Aug 2017 16:16:29 +0200 > Filippo Sironi via iommu wrote: > >> Previously, we were invalidating context cache and IOTLB globally when >> clearing one context entry. This is a tad too aggressive. >> Invalidate the context cache and IOTLB for the interested device only. >> >> Signed-off-by: Filippo Sironi >> Cc: David Woodhouse >> Cc: David Woodhouse >> Cc: Joerg Roedel >> Cc: iommu@lists.linux-foundation.org >> Cc: linux-ker...@vger.kernel.org >> --- >> drivers/iommu/intel-iommu.c | 25 ++--- >> 1 file changed, 22 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c >> index 3e8636f1220e..4bf3e59b0929 100644 >> --- a/drivers/iommu/intel-iommu.c >> +++ b/drivers/iommu/intel-iommu.c >> @@ -2351,13 +2351,32 @@ static inline int domain_pfn_mapping(struct >> dmar_domain *domain, unsigned long i >> static void domain_context_clear_one(struct intel_iommu *iommu, u8 >> bus, u8 devfn) { >> +unsigned long flags; >> +struct context_entry *context; >> +u16 did_old; >> + >> if (!iommu) >> return; >> >> +spin_lock_irqsave(&iommu->lock, flags); >> +context = iommu_context_addr(iommu, bus, devfn, 0); >> +if (!context) { >> +spin_unlock_irqrestore(&iommu->lock, flags); >> +return; >> +} > perhaps check with device_context_mapped()? Using device_context_mapped() wouldn't simplify the code since it would just tell me that at the time of check there was a context. I would still need to lock, get the context, check if the context is valid, do the work, and unlock. Modifying device_context_mapped() to return the context isn't going to work either because it may go away in the meantime since I wouldn't hold the lock. >> +did_old = context_domain_id(context); >> +spin_unlock_irqrestore(&iommu->lock, flags); >> clear_context_table(iommu, bus, devfn); >> -iommu->flush.flush_context(iommu, 0, 0, 0, >> - DMA_CCMD_GLOBAL_INVL); >> -iommu->flush.flush_iotlb(iommu, 0, 0, 0, >> DMA_TLB_GLOBAL_FLUSH); >> +iommu->flush.flush_context(iommu, >> + did_old, >> + (((u16)bus) << 8) | devfn, >> + DMA_CCMD_MASK_NOBIT, >> + DMA_CCMD_DEVICE_INVL); >> +iommu->flush.flush_iotlb(iommu, >> + did_old, >> + 0, >> + 0, >> + DMA_TLB_DSI_FLUSH); >> } >> >> static inline void unlink_domain_info(struct device_domain_info >> *info) > > [Jacob Pan] Regards, Filippo Amazon Development Center Germany GmbH Berlin - Dresden - Aachen main office: Krausenstr. 38, 10117 Berlin Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger Ust-ID: DE289237879 Eingetragen am Amtsgericht Charlottenburg HRB 149173 B ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] intel-iommu: Don't be too aggressive when clearing one context entry
Hi Joerg, > On 30. Aug 2017, at 15:31, Joerg Roedel wrote: > > Hi Filippo, > > please change the subject to: > > iommu/vt-d: Don't be too aggressive when clearing one context entry > > to follow the convention used in the iommu-tree. Another comment below. Will do. > On Mon, Aug 28, 2017 at 04:16:29PM +0200, Filippo Sironi wrote: >> static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 >> devfn) >> { >> +unsigned long flags; >> +struct context_entry *context; >> +u16 did_old; >> + >> if (!iommu) >> return; >> >> +spin_lock_irqsave(&iommu->lock, flags); >> +context = iommu_context_addr(iommu, bus, devfn, 0); >> +if (!context) { >> +spin_unlock_irqrestore(&iommu->lock, flags); >> +return; >> +} >> +did_old = context_domain_id(context); >> +spin_unlock_irqrestore(&iommu->lock, flags); >> clear_context_table(iommu, bus, devfn); > > This function is the only caller of clear_context_table(), which does > similar things (like fetching the context-entry) as you are adding > above. > > So you can either make clear_context_table() return the old domain-id > so that you don't need to do it here, or you get rid of the function > entirely and add the context_clear_entry() and __iommu_flush_cache() > calls into this code-path. > > Regards, > > Joerg I went for merging domain_context_clear_one() with context_clear_one(). Regards, Filippo Amazon Development Center Germany GmbH Berlin - Dresden - Aachen main office: Krausenstr. 38, 10117 Berlin Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger Ust-ID: DE289237879 Eingetragen am Amtsgericht Charlottenburg HRB 149173 B ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 4/6] iommu/arm-smmu-v3: Add SVM support for platform devices
From: Jean-Philippe Brucker Platform device can realise SVM function by using the stall mode. That is to say, when device access a memory via iova which is not populated, it will stalled and when SMMU try to translate this iova, it also will stall and meanwhile send an event to CPU via MSI. After SMMU driver handle the event and populated the iova, it will send a RESUME command to SMMU to exit the stall mode, therefore the platform device can contiue access the memory. Signed-off-by: Jean-Philippe Brucker --- drivers/iommu/arm-smmu-v3.c | 218 1 file changed, 181 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index cafbeef..d44256a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -359,6 +359,7 @@ #define CTXDESC_CD_0_TCR_HA_SHIFT 43 #define CTXDESC_CD_0_HD(1UL << CTXDESC_CD_0_TCR_HD_SHIFT) #define CTXDESC_CD_0_HA(1UL << CTXDESC_CD_0_TCR_HA_SHIFT) +#define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) #define CTXDESC_CD_0_A (1UL << 46) #define CTXDESC_CD_0_ASET_SHIFT47 @@ -432,6 +433,15 @@ #define CMDQ_PRI_1_RESP_FAIL (1UL << CMDQ_PRI_1_RESP_SHIFT) #define CMDQ_PRI_1_RESP_SUCC (2UL << CMDQ_PRI_1_RESP_SHIFT) +#define CMDQ_RESUME_0_SID_SHIFT32 +#define CMDQ_RESUME_0_SID_MASK 0xUL +#define CMDQ_RESUME_0_ACTION_SHIFT 12 +#define CMDQ_RESUME_0_ACTION_TERM (0UL << CMDQ_RESUME_0_ACTION_SHIFT) +#define CMDQ_RESUME_0_ACTION_RETRY (1UL << CMDQ_RESUME_0_ACTION_SHIFT) +#define CMDQ_RESUME_0_ACTION_ABORT (2UL << CMDQ_RESUME_0_ACTION_SHIFT) +#define CMDQ_RESUME_1_STAG_SHIFT 0 +#define CMDQ_RESUME_1_STAG_MASK0xUL + #define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_NONE(0UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) @@ -443,6 +453,31 @@ #define EVTQ_0_ID_SHIFT0 #define EVTQ_0_ID_MASK 0xffUL +#define EVT_ID_TRANSLATION_FAULT 0x10 +#define EVT_ID_ADDR_SIZE_FAULT 0x11 +#define EVT_ID_ACCESS_FAULT0x12 +#define EVT_ID_PERMISSION_FAULT0x13 + +#define EVTQ_0_SSV (1UL << 11) +#define EVTQ_0_SSID_SHIFT 12 +#define EVTQ_0_SSID_MASK 0xfUL +#define EVTQ_0_SID_SHIFT 32 +#define EVTQ_0_SID_MASK0xUL +#define EVTQ_1_STAG_SHIFT 0 +#define EVTQ_1_STAG_MASK 0xUL +#define EVTQ_1_STALL (1UL << 31) +#define EVTQ_1_PRIV(1UL << 33) +#define EVTQ_1_EXEC(1UL << 34) +#define EVTQ_1_READ(1UL << 35) +#define EVTQ_1_S2 (1UL << 39) +#define EVTQ_1_CLASS_SHIFT 40 +#define EVTQ_1_CLASS_MASK 0x3UL +#define EVTQ_1_TT_READ (1UL << 44) +#define EVTQ_2_ADDR_SHIFT 0 +#define EVTQ_2_ADDR_MASK 0xUL +#define EVTQ_3_IPA_SHIFT 12 +#define EVTQ_3_IPA_MASK0xffUL + /* PRI queue */ #define PRIQ_ENT_DWORDS2 #define PRIQ_MAX_SZ_SHIFT 8 @@ -586,6 +621,13 @@ struct arm_smmu_cmdq_ent { enum fault_status resp; } pri; + #define CMDQ_OP_RESUME 0x44 + struct { + u32 sid; + u16 stag; + enum fault_status resp; + } resume; + #define CMDQ_OP_CMD_SYNC0x46 }; }; @@ -659,6 +701,7 @@ struct arm_smmu_s1_cfg { struct list_headcontexts; size_t num_contexts; + boolcan_stall; struct arm_smmu_ctx_desccd; /* Default context (SSID0) */ }; @@ -682,6 +725,7 @@ struct arm_smmu_strtab_ent { struct arm_smmu_s2_cfg *s2_cfg; boolprg_response_needs_ssid; + boolcan_stall; }; struct arm_smmu_strtab_cfg { @@ -816,6 +860,7 @@ struct arm_smmu_fault { boolpriv; boollast; + boolstall; struct work_struct work; }; @@ -1098,6 +1143,21 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return -EINVAL; } break; + case CMDQ_OP_RESUME: + switch (ent->resume.resp) { + case ARM_SMMU_FAULT_FAIL: +
[RFC PATCH 1/6] dt-bindings: document stall and PASID properties for IOMMU masters
From: Jean-Philippe Brucker Document the bindings for stall and PASID capable platform devices. Signed-off-by: Jean-Philippe Brucker --- Documentation/devicetree/bindings/iommu/iommu.txt | 13 + 1 file changed, 13 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt index 5a8b462..7355d7f 100644 --- a/Documentation/devicetree/bindings/iommu/iommu.txt +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -86,6 +86,19 @@ have a means to turn off translation. But it is invalid in such cases to disable the IOMMU's device tree node in the first place because it would prevent any driver from properly setting up the translations. +Optional properties: + +- dma-can-stall: When present, the master can wait for a DMA transaction + to be handled by the IOMMU and can recover from a fault. For example, if + the page accessed by the DMA transaction isn't mapped, some IOMMUs are + able to stall the transaction and let the OS populate the page tables. + The IOMMU then performs the transaction if the fault was successfully + handled, or aborts the transaction otherwise. + +- pasid-bits: Some masters support multiple address spaces for DMA. By + tagging DMA transactions with an address space identifier. By default, + this is 0, which means that the device only has one address space. + Notes: == -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 3/6] ACPI: IORT: Add stall and pasid properties to iommu_fwspec
According to ACPI IORT spec, named component specific data has a node flags field whoes bit0 is for Stall support. However, it do not have any field for pasid bit. As PCIe SMMU support 20 pasid bits, this patch suggest to use 5 bits[5:1] in node flags field for pasid bits which means we can have 32 pasid bits. And this should be enough for platform device. Anyway, this is just a RFC. Signed-off-by: Yisheng Xie --- drivers/acpi/arm64/iort.c | 20 include/acpi/actbl2.h | 5 + 2 files changed, 25 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4d9ccdd0..514caca3 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -710,6 +710,22 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED; } +static bool iort_platform_support_stall(struct acpi_iort_node *node) +{ + struct acpi_iort_named_component *ncomp; + + ncomp = (struct acpi_iort_named_component *)node->node_data; + return ncomp->node_flags & ACPI_IORT_STALL_SUPPORTED; +} + +static unsigned long iort_platform_get_pasid_bits(struct acpi_iort_node *node) +{ + struct acpi_iort_named_component *ncomp; + + ncomp = (struct acpi_iort_named_component *)node->node_data; + return (ncomp->node_flags & ACPI_IORT_PASID_BITS_MASK) >> ACPI_IORT_PASID_BITS_SHIFT; +} + /** * iort_iommu_configure - Set-up IOMMU configuration for a device. * @@ -772,6 +788,10 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) IORT_IOMMU_TYPE, i++); } + if (!IS_ERR_OR_NULL(ops)) { + dev->iommu_fwspec->can_stall = iort_platform_support_stall(node); + dev->iommu_fwspec->num_pasid_bits = iort_platform_get_pasid_bits(node); + } } /* diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 707dda74..125b150 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -749,6 +749,11 @@ struct acpi_iort_named_component { char device_name[1];/* Path of namespace object */ }; +#define ACPI_IORT_STALL_SUPPORTED 0x0001 /* The platform device supports stall */ +#define ACPI_IORT_STALL_UNSUPPORTED0x /* The platform device doesn't support stall */ +#define ACPI_IORT_PASID_BITS_MASK 0x003e /* 5 bits for PASID BITS */ +#define ACPI_IORT_PASID_BITS_SHIFT 1 /* PASID BITS numbers shift */ + struct acpi_iort_root_complex { u64 memory_properties; /* Memory access properties */ u32 ats_attribute; -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 2/6] iommu/of: Add stall and pasid properties to iommu_fwspec
From: Jean-Philippe Brucker Add stall and pasid properties to iommu_fwspec. Signed-off-by: Jean-Philippe Brucker --- drivers/iommu/of_iommu.c | 11 +++ include/linux/iommu.h| 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 84fa6b4..b926276 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -209,6 +209,16 @@ static bool of_pci_rc_supports_ats(struct device_node *rc_node) break; } + if (dev->iommu_fwspec) { + const __be32 *prop; + + if (of_get_property(np, "dma-can-stall", NULL)) + dev->iommu_fwspec->can_stall = true; + + prop = of_get_property(np, "pasid-bits", NULL); + if (prop) + dev->iommu_fwspec->num_pasid_bits = be32_to_cpu(*prop); + } + return ops; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1c5a216..5f580de 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -387,6 +387,8 @@ struct iommu_fwspec { void*iommu_priv; u32 flags; unsigned intnum_ids; + unsigned long num_pasid_bits; + boolcan_stall; u32 ids[1]; }; -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 5/6] iommu/arm-smmu-v3: fix panic when handle stall mode irq
When SMMU do not support SVM feature, however the master support SVM, which means matser can stall and with mult-pasid number, then the user can bind a task to device using API like iommu_bind_task(). however, when device trigger a stall mode fault i will cause panic: [ 106.996087] Unable to handle kernel NULL pointer dereference at virtual address 0100 [ 106.996122] user pgtable: 4k pages, 48-bit VAs, pgd = 80003e023000 [ 106.996150] [0100] *pgd=3e04a003, *pud=3e04b003, *pmd= [ 106.996201] Internal error: Oops: 9606 [#1] PREEMPT SM [ 106.996224] Modules linked in: [ 106.996256] CPU: 0 PID: 916 Comm: irq/14-arm-smmu Not tainted 4.13.0-rc5-00035-g1235ddd-dirty #67 [ 106.996288] Hardware name: Hisilicon PhosphorHi1383 ESL (DT) [ 106.996317] task: 80003adc1c00 task.stack: 80003a9f8000 [ 106.996347] PC is at __queue_work+0x30/0x3a8 [ 106.996374] LR is at queue_work_on+0x60/0x78 [ 106.996401] pc : [] lr : [] pstate: 40c001c9 [ 106.996430] sp : 80003a9fbc20 [ 106.996451] x29: 80003a9fbc20 x28: 80003adc1c00 [ 106.996488] x27: 08d05080 x26: 80003ab0e028 [ 106.996526] x25: 80003a9900ac x24: 0001 [ 106.996562] x23: 0040 x22: [ 106.996598] x21: x20: 0140 [ 106.996634] x19: 80003ab0e028 x18: 0010 [ 106.996670] x17: a52a5040 x16: 0820f260 [ 106.996708] x15: 0018e97629e0 x14: 80003fb89468 [ 106.996744] x13: x12: 80003abb0600 [ 106.996781] x11: x10: 01010100 [ 106.996817] x9 : 85de5010 x8 : e4830001 [ 106.996854] x7 : 80003a9fbcf8 x6 : 000fffe0 [ 106.996890] x5 : x4 : 0001 [ 106.996926] x3 : x2 : 80003ab0e028 [ 106.996962] x1 : x0 : 01c0 [ 106.997002] Process irq/14-arm-smmu (pid: 916, stack limit =0x80003a9f8000) [ 106.997035] Stack: (0x80003a9fbc20 to 0x80003a9fc000) [...] [ 106.998366] Call trace: [ 106.998842] [] __queue_work+0x30/0x3a8 [ 106.998874] [] queue_work_on+0x60/0x78 [ 106.998912] [] arm_smmu_handle_stall+0x104/0x138 [ 106.998952] [] arm_smmu_evtq_thread+0xc0/0x158 [ 106.998989] [] irq_thread_fn+0x28/0x68 [ 106.999025] [] irq_thread+0x128/0x1d0 [ 106.999060] [] kthread+0xfc/0x128 [ 106.999093] [] ret_from_fork+0x10/0x50 [ 106.999130] Code: a90153f3 a90573fb d53b4220 363814c0 (b94102a0) [ 106.999159] ---[ end trace 7e5c9f0cb1f2fecd ]--- And the resean is we donot init fault_queue while the fault handle need to use it. Fix by return -EINVAL in arm_smmu_bind_task() when smmu do not support the feature of SVM. Signed-off-by: Yisheng Xie --- drivers/iommu/arm-smmu-v3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d44256a..dbda2eb 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2922,6 +2922,8 @@ static int arm_smmu_bind_task(struct device *dev, struct task_struct *task, return -EINVAL; smmu = master->smmu; + if (!(smmu->features & ARM_SMMU_FEAT_SVM)) + return -EINVAL; domain = iommu_get_domain_for_dev(dev); if (WARN_ON(!domain)) -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 0/6] Add platform device SVM support for ARM SMMUv3
Jean-Philippe has post a patchset for Adding PCIe SVM support to ARM SMMUv3: https://www.spinics.net/lists/arm-kernel/msg565155.html But for some platform devices(aka on-chip integrated devices), there is also SVM requirement, which works based on the SMMU stall mode. Jean-Philippe has prepared a prototype patchset to support it: git://linux-arm.org/linux-jpb.git svm/stall We tested this patchset with some fixes on a on-chip integrated device. The basic function is ok, so I just send them out for review, although this patchset heavily depends on the former patchset (PCIe SVM support for ARM SMMUv3), which is still under discussion. Patch Overview: *1 to 3 prepare for device tree or acpi get the device stall ability and pasid bits *4 is to realise the SVM function for platform device *5 is fix a bug when test SVM function while SMMU donnot support this feature *6 avoid ILLEGAL setting of STE and CD entry about stall Acctually here, I also have a question about SVM on SMMUv3: 1. Why the SVM feature on SMMUv3 depends on BTM feature? when bind a task to device, it will register a mmu_notify. Therefore, when a page range is invalid, we can send TLBI or ATC invalid without BTM? 2. According to ACPI IORT spec, named component specific data has a node flags field whoes bit0 is for Stall support. However, it do not have any field for pasid bit. Can we use other 5 bits[5:1] for pasid bit numbers, so we can have 32 pasid bit for a single platform device which should be enough, because SMMU only support 20 bit pasid 3. Presently, the pasid is allocate for a task but not for a context, if a task is trying to bind to 2 device A and B: a) A support 5 pasid bits b) B support 2 pasid bits c) when the task bind to device A, it allocate pasid = 16 d) then it must be fail when trying to bind to task B, for its highest pasid is 4. So it should allocate a single pasid for a context to avoid this? Jean-Philippe Brucker (3): dt-bindings: document stall and PASID properties for IOMMU masters iommu/of: Add stall and pasid properties to iommu_fwspec iommu/arm-smmu-v3: Add SVM support for platform devices Yisheng Xie (3): ACPI: IORT: Add stall and pasid properties to iommu_fwspec iommu/arm-smmu-v3: fix panic when handle stall mode irq iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD and CD.S Documentation/devicetree/bindings/iommu/iommu.txt | 13 ++ drivers/acpi/arm64/iort.c | 20 ++ drivers/iommu/arm-smmu-v3.c | 230 ++ drivers/iommu/of_iommu.c | 11 + include/acpi/actbl2.h | 5 + include/linux/iommu.h | 2 + 6 files changed, 244 insertions(+), 37 deletions(-) -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH 6/6] iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD and CD.S
It is ILLEGAL to set STE.S1STALLD if STALL_MODEL is not 0b00, which means we should not disable stall mode if stall/terminate mode is not configuable. Meanwhile, it is also ILLEGAL when STALL_MODEL==0b10 && CD.S==0 which means if stall mode is force we should always set CD.S. This patch add ARM_SMMU_FEAT_TERMINATE feature bit for smmu, and use TERMINATE feature checking to ensue above ILLEGAL cases from happening. Signed-off-by: Yisheng Xie --- drivers/iommu/arm-smmu-v3.c | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index dbda2eb..0745522 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -55,6 +55,7 @@ #define IDR0_STALL_MODEL_SHIFT 24 #define IDR0_STALL_MODEL_MASK 0x3 #define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT) +#define IDR0_STALL_MODEL_NS(1 << IDR0_STALL_MODEL_SHIFT) #define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT) #define IDR0_TTENDIAN_SHIFT21 #define IDR0_TTENDIAN_MASK 0x3 @@ -766,6 +767,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_SVM (1 << 15) #define ARM_SMMU_FEAT_HA (1 << 16) #define ARM_SMMU_FEAT_HD (1 << 17) +#define ARM_SMMU_FEAT_TERMINATE(1 << 18) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -1402,6 +1404,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, u64 val; bool cd_live; __u64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + struct arm_smmu_device *smmu = smmu_domain->smmu; /* * This function handles the following cases: @@ -1468,9 +1471,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, CTXDESC_CD_0_V; /* -* FIXME: STALL_MODEL==0b10 && CD.S==0 is ILLEGAL +* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (ssid && smmu_domain->s1_cfg.can_stall) + if ((ssid && smmu_domain->s1_cfg.can_stall) || + (!(smmu->features & ARM_SMMU_FEAT_TERMINATE) && + smmu->features & ARM_SMMU_FEAT_STALLS)) val |= CTXDESC_CD_0_S; cdptr[0] = cpu_to_le64(val); @@ -1690,12 +1695,13 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, dst[1] |= STRTAB_STE_1_PPAR; /* -* FIXME: it is illegal to set S1STALLD if STALL_MODEL=0b10 -* (force). But according to the spec, it *must* be set for +* According to spec, it is illegal to set S1STALLD if +* STALL_MODEL is not 0b00. And it *must* be set for * devices that aren't capable of stalling (notably pci!) -* So we need a "STALL_MODEL=0b00" feature bit. */ - if (smmu->features & ARM_SMMU_FEAT_STALLS && !ste->can_stall) + if (smmu->features & ARM_SMMU_FEAT_STALLS && + smmu->features & ARM_SMMU_FEAT_TERMINATE && + !ste->can_stall) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (s1ctxptr & STRTAB_STE_0_S1CTXPTR_MASK @@ -4577,9 +4583,13 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { case IDR0_STALL_MODEL_STALL: + smmu->features |= ARM_SMMU_FEAT_TERMINATE; /* Fallthrough */ case IDR0_STALL_MODEL_FORCE: smmu->features |= ARM_SMMU_FEAT_STALLS; + break; + case IDR0_STALL_MODEL_NS: + smmu->features |= ARM_SMMU_FEAT_TERMINATE; } if (reg & IDR0_S1P) -- 1.7.12.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu