From: Karol Wachowski <karol.wachow...@linux.intel.com>

Previously using dma_alloc_wc() API we created cache coherent
(mapped as write-back) mappings.

Because we disable MMU600 snooping it was required to do costly
page walk and cache flushes after each page table modification.

With write-combined buffers it's possible to do a single write memory
barrier to flush write-combined buffer to memory which simplifies the
driver and significantly reduce time of map/unmap operations.

Mapping time of 255 MB is reduced from 2.5 ms to 500 us.

Signed-off-by: Karol Wachowski <karol.wachow...@linux.intel.com>
Reviewed-by: Stanislaw Gruszka <stanislaw.grus...@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.grus...@linux.intel.com>
---
 drivers/accel/ivpu/ivpu_mmu_context.c | 115 ++++++++++++++------------
 1 file changed, 63 insertions(+), 52 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c 
b/drivers/accel/ivpu/ivpu_mmu_context.c
index 0c8c65351919..7d8005bc78d2 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -5,6 +5,9 @@
 
 #include <linux/bitfield.h>
 #include <linux/highmem.h>
+#include <linux/set_memory.h>
+
+#include <drm/drm_cache.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_hw.h"
@@ -38,12 +41,57 @@
 #define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | 
IVPU_MMU_ENTRY_FLAG_USER | \
                                IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
 
+static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
+{
+       dma_addr_t dma_addr;
+       struct page *page;
+       void *cpu;
+
+       page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+       if (!page)
+               return NULL;
+
+       set_pages_array_wc(&page, 1);
+
+       dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, 
DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(vdev->drm.dev, dma_addr))
+               goto err_free_page;
+
+       cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+       if (!cpu)
+               goto err_dma_unmap_page;
+
+
+       *dma = dma_addr;
+       return cpu;
+
+err_dma_unmap_page:
+       dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+err_free_page:
+       put_page(page);
+       return NULL;
+}
+
+static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, 
dma_addr_t dma_addr)
+{
+       struct page *page;
+
+       if (cpu_addr) {
+               page = vmalloc_to_page(cpu_addr);
+               vunmap(cpu_addr);
+               dma_unmap_page(vdev->drm.dev, dma_addr & 
~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
+                              DMA_BIDIRECTIONAL);
+               set_pages_array_wb(&page, 1);
+               put_page(page);
+       }
+}
+
 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable)
 {
        dma_addr_t pgd_dma;
 
-       pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, 
IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
-                                                 GFP_KERNEL);
+       pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
        if (!pgtable->pgd_dma_ptr)
                return -ENOMEM;
 
@@ -52,13 +100,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, 
struct ivpu_mmu_pgtab
        return 0;
 }
 
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, 
dma_addr_t dma_addr)
-{
-       if (cpu_addr)
-               dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, 
cpu_addr,
-                                 dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
-}
-
 static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable)
 {
        int pgd_idx, pud_idx, pmd_idx;
@@ -83,19 +124,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device 
*vdev, struct ivpu_mmu_pgt
                                pte_dma_ptr = 
pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
                                pte_dma = 
pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
 
-                               ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, 
pte_dma);
+                               ivpu_pgtable_free_page(vdev, pte_dma_ptr, 
pte_dma);
                        }
 
                        kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
-                       ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+                       ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
                }
 
                kfree(pgtable->pmd_ptrs[pgd_idx]);
                kfree(pgtable->pte_ptrs[pgd_idx]);
-               ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+               ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
        }
 
-       ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+       ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
 }
 
 static u64*
@@ -107,7 +148,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable,
        if (pud_dma_ptr)
                return pud_dma_ptr;
 
-       pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, 
&pud_dma, GFP_KERNEL);
+       pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
        if (!pud_dma_ptr)
                return NULL;
 
@@ -130,7 +171,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable,
        kfree(pgtable->pmd_ptrs[pgd_idx]);
 
 err_free_pud_dma_ptr:
-       ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+       ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
        return NULL;
 }
 
@@ -144,7 +185,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable,
        if (pmd_dma_ptr)
                return pmd_dma_ptr;
 
-       pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, 
&pmd_dma, GFP_KERNEL);
+       pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
        if (!pmd_dma_ptr)
                return NULL;
 
@@ -159,7 +200,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable,
        return pmd_dma_ptr;
 
 err_free_pmd_dma_ptr:
-       ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+       ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
        return NULL;
 }
 
@@ -173,7 +214,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct 
ivpu_mmu_pgtable *pgtable,
        if (pte_dma_ptr)
                return pte_dma_ptr;
 
-       pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, 
&pte_dma, GFP_KERNEL);
+       pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
        if (!pte_dma_ptr)
                return NULL;
 
@@ -248,38 +289,6 @@ static void ivpu_mmu_context_unmap_page(struct 
ivpu_mmu_context *ctx, u64 vpu_ad
        ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = 
IVPU_MMU_ENTRY_INVALID;
 }
 
-static void
-ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, 
size_t size)
-{
-       struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
-       u64 end_addr = vpu_addr + size;
-
-       /* Align to PMD entry (2 MB) */
-       vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
-
-       while (vpu_addr < end_addr) {
-               int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
-               u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
-
-               while (vpu_addr < end_addr && vpu_addr < pud_end) {
-                       int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, 
vpu_addr);
-                       u64 pmd_end = (pud_idx + 1) * 
(u64)IVPU_MMU_PMD_MAP_SIZE;
-
-                       while (vpu_addr < end_addr && vpu_addr < pmd_end) {
-                               int pmd_idx = 
FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
-
-                               
clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
-                                                   IVPU_MMU_PGTABLE_SIZE);
-                               vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
-                       }
-                       clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
-                                           IVPU_MMU_PGTABLE_SIZE);
-               }
-               clflush_cache_range(pgtable->pud_ptrs[pgd_idx], 
IVPU_MMU_PGTABLE_SIZE);
-       }
-       clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
-}
-
 static int
 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context 
*ctx,
                           u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 
prot)
@@ -352,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct 
ivpu_mmu_context *ctx,
                        mutex_unlock(&ctx->lock);
                        return ret;
                }
-               ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
                vpu_addr += size;
        }
 
+       /* Ensure page table modifications are flushed from wc buffers to 
memory */
+       wmb();
        mutex_unlock(&ctx->lock);
 
        ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -381,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, 
struct ivpu_mmu_context *ct
                size_t size = sg_dma_len(sg) + sg->offset;
 
                ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
-               ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
                vpu_addr += size;
        }
 
+       /* Ensure page table modifications are flushed from wc buffers to 
memory */
+       wmb();
        mutex_unlock(&ctx->lock);
 
        ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
-- 
2.25.1

Reply via email to