The memory carveout logic was fairly limited and had a few issues, rework it and teach it not to unmap regions that have a compatible property (since they may be used in U-Boot) or that don't have the no-map property.
The carveout process adds ~100ms to the boot time depending on the platform. This prepares us for using SMEM as a source of truth and improving support for U-boot as a first stage bootloader since SMEMs memory map doesn't already carve out some regions like ABL does. Signed-off-by: Casey Connolly <[email protected]> --- arch/arm/mach-snapdragon/board.c | 86 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/arch/arm/mach-snapdragon/board.c b/arch/arm/mach-snapdragon/board.c index 829a0109ac78..e12d3d00caa4 100644 --- a/arch/arm/mach-snapdragon/board.c +++ b/arch/arm/mach-snapdragon/board.c @@ -622,27 +622,36 @@ u64 get_page_table_size(void) { return SZ_1M; } +struct mem_resource_attrs { + fdt_addr_t start; + fdt_addr_t size; + u64 attrs; +}; + static int fdt_cmp_res(const void *v1, const void *v2) { - const struct fdt_resource *res1 = v1, *res2 = v2; + const struct mem_resource_attrs *res1 = v1, *res2 = v2; return res1->start - res2->start; } -#define N_RESERVED_REGIONS 32 +#define N_RESERVED_REGIONS 64 -/* Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access. +/* Map and unmap reserved memory regions as appropriate. + * Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access. * On some platforms this is enough to trigger a security violation and trap * to EL3. + * Regions that may be accessed by drivers get mapped explicitly. */ -static void carve_out_reserved_memory(void) +static void configure_reserved_memory(void) { - static struct fdt_resource res[N_RESERVED_REGIONS] = { 0 }; + static struct mem_resource_attrs res[N_RESERVED_REGIONS] = { 0 }; int parent, rmem, count, i = 0; phys_addr_t start; size_t size; + u64 attrs; /* Some reserved nodes must be carved out, as the cache-prefetcher may otherwise * attempt to access them, causing a security exception. */ @@ -651,14 +660,19 @@ static void carve_out_reserved_memory(void) log_err("No reserved memory regions found\n"); return; } - /* Collect the reserved memory regions */ + /* Collect the reserved memory regions and appropriate attrs */ fdt_for_each_subnode(rmem, gd->fdt_blob, parent) { const fdt32_t *ptr; - int len; + attrs = PTE_TYPE_FAULT; + /* If the no-map property isn't set then the region is valid */ if (!fdt_getprop(gd->fdt_blob, rmem, "no-map", NULL)) - continue; + attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL); + /* If the compatible property is set then this region may be accessed by drivers and should + * be marked valid too. */ + if (fdt_getprop(gd->fdt_blob, rmem, "compatible", NULL)) + attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL); if (i == N_RESERVED_REGIONS) { log_err("Too many reserved regions!\n"); break; @@ -667,50 +681,55 @@ static void carve_out_reserved_memory(void) /* Read the address and size out from the reg property. Doing this "properly" with * fdt_get_resource() takes ~70ms on SDM845, but open-coding the happy path here * takes <1ms... Oh the woes of no dcache. */ - ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", &len); + ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", NULL); if (ptr) { /* Qualcomm devices use #address/size-cells = <2> but all reserved regions are within * the 32-bit address space. So we can cheat here for speed. */ res[i].start = fdt32_to_cpu(ptr[1]); - res[i].end = res[i].start + fdt32_to_cpu(ptr[3]); + res[i].size = fdt32_to_cpu(ptr[3]); + res[i].attrs = attrs; i++; } } /* Sort the reserved memory regions by address */ count = i; - qsort(res, count, sizeof(struct fdt_resource), fdt_cmp_res); + qsort(res, count, sizeof(res[0]), fdt_cmp_res); + debug("Mapping %d regions!\n", count); /* Now set the right attributes for them. Often a lot of the regions are tightly packed together - * so we can optimise the number of calls to mmu_change_region_attr() by combining adjacent + * so we can optimise the number of calls to mmu_change_region_attr_nobreak() by combining adjacent * regions. */ - start = ALIGN_DOWN(res[0].start, SZ_2M); - size = ALIGN(res[0].end - start, SZ_2M); + start = res[0].start; + size = res[0].size; + attrs = res[0].attrs; + /* For each region after the first one, either increase the `size` to eventually be mapped or + * map the region we have and start a new one, this allows us to reduce the number of calls to + * mmu_map_region(). The loop is therefore "lagging" behind by one iteration. */ for (i = 1; i <= count; i++) { - /* We ideally want to 2M align everything for more efficient pagetables, but we must avoid - * overwriting reserved memory regions which shouldn't be mapped as FAULT (like those with - * compatible properties). - * If within 2M of the previous region, bump the size to include this region. Otherwise - * start a new region. - */ - if (i == count || start + size < res[i].start - SZ_2M) { - debug(" 0x%016llx - 0x%016llx: reserved\n", - start, start + size); - mmu_change_region_attr(start, size, PTE_TYPE_FAULT); - /* If this is the final region then quit here before we index - * out of bounds... - */ + /* If i == count we are done, just map the last region. If the last region is + * too far away or the attrs don't match then map the meta-region we have and + * start a new one. */ + if (i == count || start + size < res[i].start - SZ_8K || attrs != res[i].attrs) { + debug(" 0x%016llx - 0x%016llx: %s\n", + start, start + size, attrs == PTE_TYPE_FAULT ? "FAULT" : "VALID"); + /* No need to break-before-make since dcache is disabled */ + mmu_change_region_attr_nobreak(start, size, attrs); + /* We have now mapped all the regions */ if (i == count) break; - start = ALIGN_DOWN(res[i].start, SZ_2M); - size = ALIGN(res[i].end - start, SZ_2M); + /* Start a new meta-region */ + start = res[i].start; + size = res[i].size; + attrs = res[i].attrs; } else { - /* Bump size if this region is immediately after the previous one */ - size = ALIGN(res[i].end - start, SZ_2M); + /* This region is next to (<8K) the previous one so combine them. + * Accounting for any small (<8K) gap. */ + size = (res[i].start - start) + res[i].size; } } } @@ -744,13 +763,14 @@ void enable_caches(void) gd->arch.tlb_emerg = gd->arch.tlb_addr; gd->arch.tlb_addr = tlb_addr; gd->arch.tlb_size = tlb_size; - /* We do the carveouts only for QCS404, for now. */ + /* On some boards speculative access may trigger a NOC or XPU violation so explicitly mark reserved + * regions as inacessible (PTE_TYPE_FAULT) */ if (fdt_node_check_compatible(gd->fdt_blob, 0, "qcom,qcs404") == 0) { carveout_start = get_timer(0); /* Takes ~20-50ms on SDM845 */ - carve_out_reserved_memory(); + configure_reserved_memory(); debug("carveout time: %lums\n", get_timer(carveout_start)); } dcache_enable(); } -- 2.53.0

