Re: [PATCH] drm/msm/iommu: optimize map/unmap

2022-08-24 Thread Sai Prakash Ranjan

Hi Rob,

On 8/23/2022 12:17 AM, Rob Clark wrote:

From: Rob Clark 

Using map_pages/unmap_pages cuts down on the # of pgtable walks needed
in the process of finding where to insert/remove an entry.  The end
result is ~5-10x faster than mapping a single page at a time.

Signed-off-by: Rob Clark 
---
  drivers/gpu/drm/msm/msm_iommu.c | 91 -
  1 file changed, 79 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index a54ed354578b..0f3f60da3314 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,7 @@ struct msm_iommu_pagetable {
struct msm_mmu base;
struct msm_mmu *parent;
struct io_pgtable_ops *pgtbl_ops;
+   unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
phys_addr_t ttbr;
u32 asid;
  };
@@ -29,23 +30,85 @@ static struct msm_iommu_pagetable *to_pagetable(struct 
msm_mmu *mmu)
return container_of(mmu, struct msm_iommu_pagetable, base);
  }
  
+/* based on iommu_pgsize() in iommu.c: */

+static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable,


Maybe call this msm_iommu_pgsize? There won't be any namespace conflict since 
it is static
in both places but still would be better.


+  unsigned long iova, phys_addr_t paddr,
+  size_t size, size_t *count)
+{
+   unsigned int pgsize_idx, pgsize_idx_next;
+   unsigned long pgsizes;
+   size_t offset, pgsize, pgsize_next;
+   unsigned long addr_merge = paddr | iova;
+
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0);
+
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK(__ffs(addr_merge), 0);
+
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
+
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT(pgsize_idx);
+   if (!count)
+   return pgsize;
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accommodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
+   return pgsize;
+}
+
  static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
size_t size)
  {
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
-   size_t unmapped = 0;
  
  	/* Unmap the block one page at a time */


This comment will need an update.


while (size) {
-   unmapped += ops->unmap(ops, iova, 4096, NULL);
-   iova += 4096;
-   size -= 4096;
+   size_t unmapped, pgsize, count;
+
+   pgsize = iommu_pgsize(pagetable, iova, iova, size, );
+
+   unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL);
+   if (!unmapped)
+   break;
+
+   iova += unmapped;
+   size -= unmapped;
}
  
  	iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain);
  
-	return (unmapped == size) ? 0 : -EINVAL;

+   return (size == 0) ? 0 : -EINVAL;
  }
  
  static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,

@@ -54,7 +117,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 
iova,
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
struct scatterlist *sg;
-   size_t mapped = 0;
u64 addr = iova;
unsigned int i;
  
@@ -64,15 +126,19 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
  
  		/* Map the block one page at a time */


This comment will need an update.


while (size) {
-   if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) {
-   msm_iommu_pagetable_unmap(mmu, iova, mapped);
+   size_t pgsize, count, mapped;
+
+

[PATCH] drm/msm/iommu: optimize map/unmap

2022-08-22 Thread Rob Clark
From: Rob Clark 

Using map_pages/unmap_pages cuts down on the # of pgtable walks needed
in the process of finding where to insert/remove an entry.  The end
result is ~5-10x faster than mapping a single page at a time.

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_iommu.c | 91 -
 1 file changed, 79 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index a54ed354578b..0f3f60da3314 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,7 @@ struct msm_iommu_pagetable {
struct msm_mmu base;
struct msm_mmu *parent;
struct io_pgtable_ops *pgtbl_ops;
+   unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
phys_addr_t ttbr;
u32 asid;
 };
@@ -29,23 +30,85 @@ static struct msm_iommu_pagetable *to_pagetable(struct 
msm_mmu *mmu)
return container_of(mmu, struct msm_iommu_pagetable, base);
 }
 
+/* based on iommu_pgsize() in iommu.c: */
+static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable,
+  unsigned long iova, phys_addr_t paddr,
+  size_t size, size_t *count)
+{
+   unsigned int pgsize_idx, pgsize_idx_next;
+   unsigned long pgsizes;
+   size_t offset, pgsize, pgsize_next;
+   unsigned long addr_merge = paddr | iova;
+
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0);
+
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK(__ffs(addr_merge), 0);
+
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
+
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT(pgsize_idx);
+   if (!count)
+   return pgsize;
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accommodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
+   return pgsize;
+}
+
 static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
size_t size)
 {
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
-   size_t unmapped = 0;
 
/* Unmap the block one page at a time */
while (size) {
-   unmapped += ops->unmap(ops, iova, 4096, NULL);
-   iova += 4096;
-   size -= 4096;
+   size_t unmapped, pgsize, count;
+
+   pgsize = iommu_pgsize(pagetable, iova, iova, size, );
+
+   unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL);
+   if (!unmapped)
+   break;
+
+   iova += unmapped;
+   size -= unmapped;
}
 
iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain);
 
-   return (unmapped == size) ? 0 : -EINVAL;
+   return (size == 0) ? 0 : -EINVAL;
 }
 
 static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
@@ -54,7 +117,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 
iova,
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
struct scatterlist *sg;
-   size_t mapped = 0;
u64 addr = iova;
unsigned int i;
 
@@ -64,15 +126,19 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, 
u64 iova,
 
/* Map the block one page at a time */
while (size) {
-   if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) {
-   msm_iommu_pagetable_unmap(mmu, iova, mapped);
+   size_t pgsize, count, mapped;
+
+   pgsize = iommu_pgsize(pagetable, addr, phys, size, 
);
+
+   if (ops->map_pages(ops, addr, phys, pgsize, count,
+  prot, GFP_KERNEL, )) {
+