The memory carveout logic was fairly limited and had a few issues,
rework it and teach it not to unmap regions that have a compatible
property (since they may be used in U-Boot) or that don't have the
no-map property.

The carveout process adds ~100ms to the boot time depending on the
platform.

This prepares us for using SMEM as a source of truth and improving
support for U-boot as a first stage bootloader since SMEMs memory map
doesn't already carve out some regions like ABL does.

Signed-off-by: Casey Connolly <[email protected]>
---
 arch/arm/mach-snapdragon/board.c | 86 +++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 33 deletions(-)

diff --git a/arch/arm/mach-snapdragon/board.c b/arch/arm/mach-snapdragon/board.c
index 829a0109ac78..e12d3d00caa4 100644
--- a/arch/arm/mach-snapdragon/board.c
+++ b/arch/arm/mach-snapdragon/board.c
@@ -622,27 +622,36 @@ u64 get_page_table_size(void)
 {
        return SZ_1M;
 }
 
+struct mem_resource_attrs {
+       fdt_addr_t start;
+       fdt_addr_t size;
+       u64 attrs;
+};
+
 static int fdt_cmp_res(const void *v1, const void *v2)
 {
-       const struct fdt_resource *res1 = v1, *res2 = v2;
+       const struct mem_resource_attrs *res1 = v1, *res2 = v2;
 
        return res1->start - res2->start;
 }
 
-#define N_RESERVED_REGIONS 32
+#define N_RESERVED_REGIONS 64
 
-/* Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
+/* Map and unmap reserved memory regions as appropriate.
+ * Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
  * On some platforms this is enough to trigger a security violation and trap
  * to EL3.
+ * Regions that may be accessed by drivers get mapped explicitly.
  */
-static void carve_out_reserved_memory(void)
+static void configure_reserved_memory(void)
 {
-       static struct fdt_resource res[N_RESERVED_REGIONS] = { 0 };
+       static struct mem_resource_attrs res[N_RESERVED_REGIONS] = { 0 };
        int parent, rmem, count, i = 0;
        phys_addr_t start;
        size_t size;
+       u64 attrs;
 
        /* Some reserved nodes must be carved out, as the cache-prefetcher may 
otherwise
         * attempt to access them, causing a security exception.
         */
@@ -651,14 +660,19 @@ static void carve_out_reserved_memory(void)
                log_err("No reserved memory regions found\n");
                return;
        }
 
-       /* Collect the reserved memory regions */
+       /* Collect the reserved memory regions and appropriate attrs */
        fdt_for_each_subnode(rmem, gd->fdt_blob, parent) {
                const fdt32_t *ptr;
-               int len;
+               attrs = PTE_TYPE_FAULT;
+               /* If the no-map property isn't set then the region is valid */
                if (!fdt_getprop(gd->fdt_blob, rmem, "no-map", NULL))
-                       continue;
+                       attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
+               /* If the compatible property is set then this region may be 
accessed by drivers and should
+                * be marked valid too. */
+               if (fdt_getprop(gd->fdt_blob, rmem, "compatible", NULL))
+                       attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
 
                if (i == N_RESERVED_REGIONS) {
                        log_err("Too many reserved regions!\n");
                        break;
@@ -667,50 +681,55 @@ static void carve_out_reserved_memory(void)
                /* Read the address and size out from the reg property. Doing 
this "properly" with
                 * fdt_get_resource() takes ~70ms on SDM845, but open-coding 
the happy path here
                 * takes <1ms... Oh the woes of no dcache.
                 */
-               ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", &len);
+               ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", NULL);
                if (ptr) {
                        /* Qualcomm devices use #address/size-cells = <2> but 
all reserved regions are within
                         * the 32-bit address space. So we can cheat here for 
speed.
                         */
                        res[i].start = fdt32_to_cpu(ptr[1]);
-                       res[i].end = res[i].start + fdt32_to_cpu(ptr[3]);
+                       res[i].size = fdt32_to_cpu(ptr[3]);
+                       res[i].attrs = attrs;
                        i++;
                }
        }
 
        /* Sort the reserved memory regions by address */
        count = i;
-       qsort(res, count, sizeof(struct fdt_resource), fdt_cmp_res);
+       qsort(res, count, sizeof(res[0]), fdt_cmp_res);
+       debug("Mapping %d regions!\n", count);
 
        /* Now set the right attributes for them. Often a lot of the regions 
are tightly packed together
-        * so we can optimise the number of calls to mmu_change_region_attr() 
by combining adjacent
+        * so we can optimise the number of calls to 
mmu_change_region_attr_nobreak() by combining adjacent
         * regions.
         */
-       start = ALIGN_DOWN(res[0].start, SZ_2M);
-       size = ALIGN(res[0].end - start, SZ_2M);
+       start = res[0].start;
+       size = res[0].size;
+       attrs = res[0].attrs;
+       /* For each region after the first one, either increase the `size` to 
eventually be mapped or
+        * map the region we have and start a new one, this allows us to reduce 
the number of calls to
+        * mmu_map_region(). The loop is therefore "lagging" behind by one 
iteration. */
        for (i = 1; i <= count; i++) {
-               /* We ideally want to 2M align everything for more efficient 
pagetables, but we must avoid
-                * overwriting reserved memory regions which shouldn't be 
mapped as FAULT (like those with
-                * compatible properties).
-                * If within 2M of the previous region, bump the size to 
include this region. Otherwise
-                * start a new region.
-                */
-               if (i == count || start + size < res[i].start - SZ_2M) {
-                       debug("  0x%016llx - 0x%016llx: reserved\n",
-                             start, start + size);
-                       mmu_change_region_attr(start, size, PTE_TYPE_FAULT);
-                       /* If this is the final region then quit here before we 
index
-                        * out of bounds...
-                        */
+               /* If i == count we are done, just map the last region. If the 
last region is
+                * too far away or the attrs don't match then map the 
meta-region we have and
+                * start a new one. */
+               if (i == count || start + size < res[i].start - SZ_8K || attrs 
!= res[i].attrs) {
+                       debug("  0x%016llx - 0x%016llx: %s\n",
+                               start, start + size, attrs == PTE_TYPE_FAULT ? 
"FAULT" : "VALID");
+                       /* No need to break-before-make since dcache is 
disabled */
+                       mmu_change_region_attr_nobreak(start, size, attrs);
+                       /* We have now mapped all the regions */
                        if (i == count)
                                break;
-                       start = ALIGN_DOWN(res[i].start, SZ_2M);
-                       size = ALIGN(res[i].end - start, SZ_2M);
+                       /* Start a new meta-region */
+                       start = res[i].start;
+                       size = res[i].size;
+                       attrs = res[i].attrs;
                } else {
-                       /* Bump size if this region is immediately after the 
previous one */
-                       size = ALIGN(res[i].end - start, SZ_2M);
+                       /* This region is next to (<8K) the previous one so 
combine them.
+                        * Accounting for any small (<8K) gap. */
+                       size = (res[i].start - start) + res[i].size;
                }
        }
 }
 
@@ -744,13 +763,14 @@ void enable_caches(void)
        gd->arch.tlb_emerg = gd->arch.tlb_addr;
        gd->arch.tlb_addr = tlb_addr;
        gd->arch.tlb_size = tlb_size;
 
-       /* We do the carveouts only for QCS404, for now. */
+       /* On some boards speculative access may trigger a NOC or XPU violation 
so explicitly mark reserved
+        * regions as inacessible (PTE_TYPE_FAULT) */
        if (fdt_node_check_compatible(gd->fdt_blob, 0, "qcom,qcs404") == 0) {
                carveout_start = get_timer(0);
                /* Takes ~20-50ms on SDM845 */
-               carve_out_reserved_memory();
+               configure_reserved_memory();
                debug("carveout time: %lums\n", get_timer(carveout_start));
        }
        dcache_enable();
 }

-- 
2.53.0

Reply via email to