Re: [Freedreno] [PATCH v2] drm/msm/iommu: optimize map/unmap
On Tue, Aug 23, 2022 at 2:37 PM Akhil P Oommen wrote: > > On 8/23/2022 10:07 PM, Rob Clark wrote: > > From: Rob Clark > > > > Using map_pages/unmap_pages cuts down on the # of pgtable walks needed > > in the process of finding where to insert/remove an entry. The end > > result is ~5-10x faster than mapping a single page at a time. > > > > v2: Rename iommu_pgsize(), drop obsolete comments, fix error handling > > in msm_iommu_pagetable_map() > > > > Signed-off-by: Rob Clark > > --- > > drivers/gpu/drm/msm/msm_iommu.c | 101 +++- > > 1 file changed, 86 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c > > b/drivers/gpu/drm/msm/msm_iommu.c > > index a54ed354578b..5577cea7c009 100644 > > --- a/drivers/gpu/drm/msm/msm_iommu.c > > +++ b/drivers/gpu/drm/msm/msm_iommu.c > > @@ -21,6 +21,7 @@ struct msm_iommu_pagetable { > > struct msm_mmu base; > > struct msm_mmu *parent; > > struct io_pgtable_ops *pgtbl_ops; > > + unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */ > > phys_addr_t ttbr; > > u32 asid; > > }; > > @@ -29,23 +30,84 @@ static struct msm_iommu_pagetable *to_pagetable(struct > > msm_mmu *mmu) > > return container_of(mmu, struct msm_iommu_pagetable, base); > > } > > > > +/* based on iommu_pgsize() in iommu.c: */ > > +static size_t calc_pgsize(struct msm_iommu_pagetable *pagetable, > > +unsigned long iova, phys_addr_t paddr, > > +size_t size, size_t *count) > > +{ > > + unsigned int pgsize_idx, pgsize_idx_next; > > + unsigned long pgsizes; > > + size_t offset, pgsize, pgsize_next; > > + unsigned long addr_merge = paddr | iova; > > + > > + /* Page sizes supported by the hardware and small enough for @size */ > > + pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0); > > + > > + /* Constrain the page sizes further based on the maximum alignment */ > > + if (likely(addr_merge)) > > + pgsizes &= GENMASK(__ffs(addr_merge), 0); > > + > > + /* Make sure we have at least one suitable page size */ > > + BUG_ON(!pgsizes); > > + > > + /* Pick the biggest page size remaining */ > > + pgsize_idx = __fls(pgsizes); > > + pgsize = BIT(pgsize_idx); > > + if (!count) > > + return pgsize; > > + > > + /* Find the next biggest support page size, if it exists */ > > + pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); > > + if (!pgsizes) > > + goto out_set_count; > > + > > + pgsize_idx_next = __ffs(pgsizes); > > + pgsize_next = BIT(pgsize_idx_next); > > + > > + /* > > + * There's no point trying a bigger page size unless the virtual > > + * and physical addresses are similarly offset within the larger page. > > + */ > > + if ((iova ^ paddr) & (pgsize_next - 1)) > > + goto out_set_count; > > + > > + /* Calculate the offset to the next page size alignment boundary */ > > + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); > > + > > + /* > > + * If size is big enough to accommodate the larger page, reduce > > + * the number of smaller pages. > > + */ > > + if (offset + pgsize_next <= size) > > + size = offset; > > + > > +out_set_count: > > + *count = size >> pgsize_idx; > > + return pgsize; > > +} > > + > Can we keep this in iommu driver? Seems useful to other drivers too. This might end up being only temporary.. Robin had the idea of adding a private way to create "dummy" iommu_domain's which we could use instead of the pgtbl ops directly. On one hand, it would simplify this quite a bit. On the other hand it would force powering up (at least the SMMU) for unmaps/maps, and make it harder to do things like this: https://patchwork.freedesktop.org/patch/498660/?series=107536=1 > Perhaps implement an sg friendly version of iopgtble ops, like > unmap_sg() maybe! Probably not a good idea to push more into the iopgtbl implementations.. __iommu_map_sg() does have a bit of cleverness, but that shouldn't really be required if we get our sg from drm_prime_pages_to_sg() since sg_alloc_append_table_from_pages() already performs a similar merging BR, -R > > -Akhil. > > static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, > > size_t size) > > { > > struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); > > struct io_pgtable_ops *ops = pagetable->pgtbl_ops; > > - size_t unmapped = 0; > > > > - /* Unmap the block one page at a time */ > > while (size) { > > - unmapped += ops->unmap(ops, iova, 4096, NULL); > > - iova += 4096; > > - size -= 4096; > > + size_t unmapped, pgsize, count; > > + > > + pgsize = calc_pgsize(pagetable, iova, iova, size, ); > > + > > + unmapped = ops->unmap_pages(ops, iova, pgsize,
Re: [Freedreno] [PATCH v2] drm/msm/iommu: optimize map/unmap
On 8/23/2022 10:07 PM, Rob Clark wrote: From: Rob Clark Using map_pages/unmap_pages cuts down on the # of pgtable walks needed in the process of finding where to insert/remove an entry. The end result is ~5-10x faster than mapping a single page at a time. v2: Rename iommu_pgsize(), drop obsolete comments, fix error handling in msm_iommu_pagetable_map() Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_iommu.c | 101 +++- 1 file changed, 86 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index a54ed354578b..5577cea7c009 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,7 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; }; @@ -29,23 +30,84 @@ static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) return container_of(mmu, struct msm_iommu_pagetable, base); } +/* based on iommu_pgsize() in iommu.c: */ +static size_t calc_pgsize(struct msm_iommu_pagetable *pagetable, + unsigned long iova, phys_addr_t paddr, + size_t size, size_t *count) +{ + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; + + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0); + + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); + + /* Make sure we have at least one suitable page size */ + BUG_ON(!pgsizes); + + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + + /* Find the next biggest support page size, if it exists */ + pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* +* There's no point trying a bigger page size unless the virtual +* and physical addresses are similarly offset within the larger page. +*/ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* +* If size is big enough to accommodate the larger page, reduce +* the number of smaller pages. +*/ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; + return pgsize; +} + Can we keep this in iommu driver? Seems useful to other drivers too. Perhaps implement an sg friendly version of iopgtble ops, like unmap_sg() maybe! -Akhil. static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, size_t size) { struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); struct io_pgtable_ops *ops = pagetable->pgtbl_ops; - size_t unmapped = 0; - /* Unmap the block one page at a time */ while (size) { - unmapped += ops->unmap(ops, iova, 4096, NULL); - iova += 4096; - size -= 4096; + size_t unmapped, pgsize, count; + + pgsize = calc_pgsize(pagetable, iova, iova, size, ); + + unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL); + if (!unmapped) + break; + + iova += unmapped; + size -= unmapped; } iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain); - return (unmapped == size) ? 0 : -EINVAL; + return (size == 0) ? 0 : -EINVAL; } static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, @@ -54,7 +116,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); struct io_pgtable_ops *ops = pagetable->pgtbl_ops; struct scatterlist *sg; - size_t mapped = 0; u64 addr = iova; unsigned int i; @@ -62,17 +123,26 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, size_t size = sg->length; phys_addr_t phys = sg_phys(sg); - /* Map the block one page at a time */ while (size) { - if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) { -