This needs to be split up and cleaned up, but I include it anyway
to show what needs to be done still, here for arm32 and for arm64 as
well.

Signed-off-by: Ahmad Fatoum <a.fat...@pengutronix.de>
---
 arch/arm/cpu/mmu_32.c | 253 +++++++++++++++++++++++++++++++++---------
 1 file changed, 201 insertions(+), 52 deletions(-)

diff --git a/arch/arm/cpu/mmu_32.c b/arch/arm/cpu/mmu_32.c
index 9f50194c7c2b..d4ed298ac64f 100644
--- a/arch/arm/cpu/mmu_32.c
+++ b/arch/arm/cpu/mmu_32.c
@@ -70,6 +70,45 @@ static void set_pte(uint32_t *pt, uint32_t val)
        WRITE_ONCE(*pt, val);
 }
 
+static void set_pte_range(uint32_t *virt, phys_addr_t phys,
+                         size_t count, uint32_t flags,
+                         bool break_before_make)
+{
+       bool made = false;
+
+       if (!break_before_make)
+               goto write_attrs;
+
+       if ((flags & PTE_TYPE_MASK) == PTE_TYPE_FAULT)
+               phys = 0;
+
+       for (int i = 0; i < count; i++) {
+               if (READ_ONCE(virt[i]) == ((phys + i * PAGE_SIZE) | flags))
+                       continue;
+               set_pte(&virt[i], PTE_TYPE_FAULT);
+               made = true;
+       }
+
+       if (made) {
+               dma_flush_range( virt, count * sizeof(u32));
+               tlb_invalidate();
+       } else {
+               break_before_make = false;
+       }
+
+write_attrs:
+       for (int i = 0; i < count; i++, phys += PAGE_SIZE)
+               set_pte(&virt[i], phys | flags);
+
+       dma_flush_range(virt, count * sizeof(u32));
+
+#if 0
+       pr_notice("%s(0x%08x+0x%zx -> 0x%08x, flags=0x%x%s)\n", __func__,
+                 (unsigned)virt, count, phys, flags,
+                 made ? " [BBM]" : break_before_make ? " [BBM, but unneeded]" 
: "");
+#endif
+}
+
 #ifdef __PBL__
 static uint32_t *alloc_pte(void)
 {
@@ -89,30 +128,47 @@ static uint32_t *alloc_pte(void)
 }
 #endif
 
-static u32 *find_pte(unsigned long adr)
+static u32 *__find_pte(uint32_t *ttb, unsigned long adr, int *level)
 {
+       u32 *pgd = (u32 *)&ttb[pgd_index(adr)];
        u32 *table;
-       uint32_t *ttb = get_ttb();
 
-       if (!pgd_type_table(ttb[pgd_index(adr)]))
-               return NULL;
+       if (!pgd_type_table(*pgd)) {
+               *level = 1;
+               return pgd;
+       }
+
+       *level = 2;
 
        /* find the coarse page table base address */
-       table = (u32 *)(ttb[pgd_index(adr)] & ~0x3ff);
+       table = (u32 *)(*pgd & ~0x3ff);
 
        /* find second level descriptor */
        return &table[(adr >> PAGE_SHIFT) & 0xff];
 }
 
+static u32 *find_pte(unsigned long adr)
+{
+       int level;
+       u32 *pte = __find_pte(get_ttb(), adr, &level);
+
+       return level == 2 ? pte : NULL;
+}
+
+static void dma_flush_range_end(unsigned long start, unsigned long end)
+{
+       __dma_flush_range(start, end);
+
+       if (outer_cache.flush_range)
+               outer_cache.flush_range(start, end);
+}
+
 void dma_flush_range(void *ptr, size_t size)
 {
        unsigned long start = (unsigned long)ptr;
        unsigned long end = start + size;
 
-       __dma_flush_range(start, end);
-
-       if (outer_cache.flush_range)
-               outer_cache.flush_range(start, end);
+       dma_flush_range_end(start, end);
 }
 
 void dma_inv_range(void *ptr, size_t size)
@@ -132,11 +188,11 @@ void dma_inv_range(void *ptr, size_t size)
  * Not yet exported, but may be later if someone finds use for it.
  */
 static u32 *arm_create_pte(unsigned long virt, unsigned long phys,
-                          uint32_t flags)
+                          uint32_t flags, bool break_before_make)
 {
        uint32_t *ttb = get_ttb();
        u32 *table;
-       int i, ttb_idx;
+       int ttb_idx;
 
        virt = ALIGN_DOWN(virt, PGDIR_SIZE);
        phys = ALIGN_DOWN(phys, PGDIR_SIZE);
@@ -145,16 +201,11 @@ static u32 *arm_create_pte(unsigned long virt, unsigned 
long phys,
 
        ttb_idx = pgd_index(virt);
 
-       for (i = 0; i < PTRS_PER_PTE; i++) {
-               set_pte(&table[i], phys | PTE_TYPE_SMALL | flags);
-               virt += PAGE_SIZE;
-               phys += PAGE_SIZE;
-       }
-       dma_flush_range(table, PTRS_PER_PTE * sizeof(u32));
+       set_pte_range(table, phys, PTRS_PER_PTE, PTE_TYPE_SMALL | flags,
+                     break_before_make);
 
-       // TODO break-before-make missing
-       set_pte(&ttb[ttb_idx], (unsigned long)table | PMD_TYPE_TABLE);
-       dma_flush_range(&ttb[ttb_idx], sizeof(u32));
+       set_pte_range(&ttb[ttb_idx], (unsigned long)table, 1,
+                     PMD_TYPE_TABLE, break_before_make);
 
        return table;
 }
@@ -243,6 +294,22 @@ static uint32_t get_pte_flags(int map_type)
        }
 }
 
+static const char *map_type_tostr(int map_type)
+{
+       switch (map_type) {
+       case MAP_CACHED:
+               return "CACHED";
+       case MAP_UNCACHED:
+               return "UNCACHED";
+       case ARCH_MAP_WRITECOMBINE:
+               return "WRITECOMBINE";
+       case MAP_FAULT:
+               return "FAULT";
+       default:
+               return "<unknown>";
+       }
+}
+
 static uint32_t get_pmd_flags(int map_type)
 {
        return pte_flags_to_pmd(get_pte_flags(map_type));
@@ -250,6 +317,7 @@ static uint32_t get_pmd_flags(int map_type)
 
 static void __arch_remap_range(void *_virt_addr, phys_addr_t phys_addr, size_t 
size, unsigned map_type)
 {
+       bool mmu_on;
        u32 virt_addr = (u32)_virt_addr;
        u32 pte_flags, pmd_flags;
        uint32_t *ttb = get_ttb();
@@ -262,6 +330,13 @@ static void __arch_remap_range(void *_virt_addr, 
phys_addr_t phys_addr, size_t s
 
        size = PAGE_ALIGN(size);
 
+       mmu_on = get_cr() & CR_M;
+
+       pr_info("[MMU %s]remapping 0x%08x+0x%zx: phys 0x%08lx, type %s\n",
+                 get_cr() & CR_M ? " ON" : "OFF",
+                 virt_addr, size, (ulong)phys_addr,
+                 map_type_tostr(map_type));
+
        while (size) {
                const bool pgdir_size_aligned = IS_ALIGNED(virt_addr, 
PGDIR_SIZE);
                u32 *pgd = (u32 *)&ttb[pgd_index(virt_addr)];
@@ -270,22 +345,20 @@ static void __arch_remap_range(void *_virt_addr, 
phys_addr_t phys_addr, size_t s
                if (size >= PGDIR_SIZE && pgdir_size_aligned &&
                    IS_ALIGNED(phys_addr, PGDIR_SIZE) &&
                    !pgd_type_table(*pgd)) {
-                       u32 val;
+                       u32 flags;
                        /*
                         * TODO: Add code to discard a page table and
                         * replace it with a section
                         */
                        chunk = PGDIR_SIZE;
-                       val = phys_addr | pmd_flags;
+                       flags = pmd_flags;
                        if (map_type != MAP_FAULT)
-                               val |= PMD_TYPE_SECT;
-                       // TODO break-before-make missing
-                       set_pte(pgd, val);
-                       dma_flush_range(pgd, sizeof(*pgd));
+                               flags |= PMD_TYPE_SECT;
+                       set_pte_range(pgd, phys_addr, 1, flags, mmu_on);
                } else {
                        unsigned int num_ptes;
                        u32 *table = NULL;
-                       unsigned int i;
+                       u32 flags;
                        u32 *pte;
                        /*
                         * We only want to cover pages up until next
@@ -313,24 +386,16 @@ static void __arch_remap_range(void *_virt_addr, 
phys_addr_t phys_addr, size_t s
                                 * create a new page table for it
                                 */
                                table = arm_create_pte(virt_addr, phys_addr,
-                                                      pmd_flags_to_pte(*pgd));
+                                                      pmd_flags_to_pte(*pgd), 
mmu_on);
                                pte = find_pte(virt_addr);
                                BUG_ON(!pte);
                        }
 
-                       for (i = 0; i < num_ptes; i++) {
-                               u32 val;
+                       flags = pte_flags;
+                       if (map_type != MAP_FAULT)
+                               flags |= PTE_TYPE_SMALL;
 
-                               val = phys_addr + i * PAGE_SIZE;
-                               val |= pte_flags;
-                               if (map_type != MAP_FAULT)
-                                       val |= PTE_TYPE_SMALL;
-
-                               // TODO break-before-make missing
-                               set_pte(&pte[i], val);
-                       }
-
-                       dma_flush_range(pte, num_ptes * sizeof(u32));
+                       set_pte_range(pte, phys_addr, num_ptes, flags, mmu_on);
                }
 
                virt_addr += chunk;
@@ -345,12 +410,99 @@ static void early_remap_range(u32 addr, size_t size, 
unsigned map_type)
        __arch_remap_range((void *)addr, addr, size, map_type);
 }
 
+static size_t granule_size(int level)
+{
+       switch (level) {
+       default:
+       case 1:
+               return PGDIR_SIZE;
+       case 2:
+               return PAGE_SIZE;
+       }
+}
+
+static bool pte_is_cacheable(uint32_t pte, int level)
+{
+       return  (level == 2 && (pte & PTE_CACHEABLE)) ||
+               (level == 1 && (pte & PMD_SECT_CACHEABLE));
+}
+
+/**
+ * flush_cacheable_pages - Flush only the cacheable pages in a region
+ * @start: Starting virtual address of the range.
+ * @end:   Ending virtual address of the range.
+ *
+ * This function walks the page table and flushes the data caches for the
+ * specified range only if the memory is marked as normal cacheable in the
+ * page tables. If a non-cacheable or non-normal page is encountered,
+ * it's skipped.
+ */
+static void flush_cacheable_pages(void *start, size_t size)
+{
+       u32 flush_start = ~0UL, flush_end = ~0UL;
+       u32 region_start, region_end;
+       size_t block_size;
+       u32 *ttb;
+
+       region_start = PAGE_ALIGN_DOWN((ulong)start);
+       region_end = PAGE_ALIGN(region_start + size);
+
+       ttb = get_ttb();
+
+       /*
+        * TODO: This loop could be made more optimal by inlining the page walk,
+        * so we need not restart address translation from the top every time.
+        *
+        * The hope is that with the page tables being cached and the
+        * windows being remapped being small, the overhead compared to
+        * actually flushing the ranges isn't too significant.
+        */
+       for (u32 addr = region_start; addr < region_end; addr += block_size) {
+               int level;
+               u32 *pte = __find_pte(ttb, addr, &level);
+
+               block_size = granule_size(level);
+
+               if (!pte || !pte_is_cacheable(*pte, level))
+                       continue;
+
+               if (flush_end == addr) {
+                       /*
+                        * While it's safe to flush the whole block_size,
+                        * it's unnecessary time waste to go beyond region_end.
+                        */
+                       flush_end = min(flush_end + block_size, region_end);
+                       continue;
+               }
+
+               /*
+                * We don't have a previous contiguous flush area to append to.
+                * If we recorded any area before, let's flush it now
+                */
+               if (flush_start != ~0U) {
+                       pr_notice("flushing %x-%x\n", flush_start, flush_end);
+                       dma_flush_range_end(flush_start, flush_end);
+               }
+
+               /* and start the new contiguous flush area with this page */
+               flush_start = addr;
+               flush_end = min(flush_start + block_size, region_end);
+       }
+
+       /* The previous loop won't flush the last cached range, so do it here */
+       if (flush_start != ~0UL) {
+               pr_notice("flushing %x-%x\n", flush_start, flush_end);
+               dma_flush_range_end(flush_start, flush_end);
+       }
+}
+
 int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, 
unsigned map_type)
 {
-       __arch_remap_range(virt_addr, phys_addr, size, map_type);
 
-       if (map_type == MAP_UNCACHED)
-               dma_inv_range(virt_addr, size);
+       if (map_type != MAP_CACHED)
+               flush_cacheable_pages(virt_addr, size);
+
+       __arch_remap_range(virt_addr, phys_addr, size, map_type);
 
        return 0;
 }
@@ -377,13 +529,11 @@ static inline void create_flat_mapping(void)
 
 void *map_io_sections(unsigned long phys, void *_start, size_t size)
 {
-       unsigned long start = (unsigned long)_start, sec;
+       unsigned long start = (unsigned long)_start;
        uint32_t *ttb = get_ttb();
 
-       for (sec = start; sec < start + size; sec += PGDIR_SIZE, phys += 
PGDIR_SIZE) {
-               // TODO break-before-make missing
-               set_pte(&ttb[pgd_index(sec)], phys | 
get_pmd_flags(MAP_UNCACHED));
-       }
+       set_pte_range(&ttb[pgd_index(start)], phys, size / PGDIR_SIZE,
+                     get_pmd_flags(MAP_UNCACHED), true);
 
        dma_flush_range(ttb, 0x4000);
        tlb_invalidate();
@@ -424,11 +574,10 @@ static void create_vector_table(unsigned long adr)
                vectors = xmemalign(PAGE_SIZE, PAGE_SIZE);
                pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n",
                         vectors, adr);
-               arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED));
+               arm_create_pte(adr, adr, get_pte_flags(MAP_UNCACHED), true);
                pte = find_pte(adr);
-               // TODO break-before-make missing
-               set_pte(pte, (u32)vectors | PTE_TYPE_SMALL |
-                       get_pte_flags(MAP_CACHED));
+               set_pte_range(pte, (u32)vectors, 1, PTE_TYPE_SMALL |
+                             get_pte_flags(MAP_CACHED), true);
        }
 
        arm_fixup_vectors();
-- 
2.39.5


Reply via email to