On 05/05/2026 14:25, Sumit Garg wrote:
> On Mon, May 04, 2026 at 08:57:33PM +0200, Casey Connolly wrote:
>> The memory carveout logic was fairly limited and had a few issues,
>> rework it and teach it not to unmap regions that have a compatible
>> property (since they may be used in U-Boot) or that don't have the
>> no-map property.
>>
>> The carveout process adds ~100ms to the boot time depending on the
>> platform.
>>
>> This prepares us for using SMEM as a source of truth and improving
>> support for U-boot as a first stage bootloader since SMEMs memory map
>> doesn't already carve out some regions like ABL does.
>>
>> Signed-off-by: Casey Connolly <[email protected]>
>> ---
>>  arch/arm/mach-snapdragon/board.c | 86 
>> +++++++++++++++++++++++++---------------
>>  1 file changed, 53 insertions(+), 33 deletions(-)
>>
>> diff --git a/arch/arm/mach-snapdragon/board.c 
>> b/arch/arm/mach-snapdragon/board.c
>> index 829a0109ac78..e12d3d00caa4 100644
>> --- a/arch/arm/mach-snapdragon/board.c
>> +++ b/arch/arm/mach-snapdragon/board.c
>> @@ -622,27 +622,36 @@ u64 get_page_table_size(void)
>>  {
>>      return SZ_1M;
>>  }
>>  
>> +struct mem_resource_attrs {
>> +    fdt_addr_t start;
>> +    fdt_addr_t size;
>> +    u64 attrs;
>> +};
> 
> Let's move the struct declaration towards the top.

it's only used by this one function, it felt easier to keep things
readable this way. Probably we'll move this stuff to its own file at
some point.

> 
>> +
>>  static int fdt_cmp_res(const void *v1, const void *v2)
> 
> This API should now be renamed as mem_cmp_resources(), no?

yeah

> 
>>  {
>> -    const struct fdt_resource *res1 = v1, *res2 = v2;
>> +    const struct mem_resource_attrs *res1 = v1, *res2 = v2;
>>  
>>      return res1->start - res2->start;
>>  }
>>  
>> -#define N_RESERVED_REGIONS 32
>> +#define N_RESERVED_REGIONS 64
>>  
>> -/* Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
>> +/* Map and unmap reserved memory regions as appropriate.
>> + * Mark all no-map regions as PTE_TYPE_FAULT to prevent speculative access.
>>   * On some platforms this is enough to trigger a security violation and trap
>>   * to EL3.
>> + * Regions that may be accessed by drivers get mapped explicitly.
>>   */
>> -static void carve_out_reserved_memory(void)
>> +static void configure_reserved_memory(void)
>>  {
>> -    static struct fdt_resource res[N_RESERVED_REGIONS] = { 0 };
>> +    static struct mem_resource_attrs res[N_RESERVED_REGIONS] = { 0 };
>>      int parent, rmem, count, i = 0;
>>      phys_addr_t start;
>>      size_t size;
>> +    u64 attrs;
>>  
>>      /* Some reserved nodes must be carved out, as the cache-prefetcher may 
>> otherwise
>>       * attempt to access them, causing a security exception.
>>       */
>> @@ -651,14 +660,19 @@ static void carve_out_reserved_memory(void)
>>              log_err("No reserved memory regions found\n");
>>              return;
>>      }
>>  
>> -    /* Collect the reserved memory regions */
>> +    /* Collect the reserved memory regions and appropriate attrs */
>>      fdt_for_each_subnode(rmem, gd->fdt_blob, parent) {
>>              const fdt32_t *ptr;
>> -            int len;
>> +            attrs = PTE_TYPE_FAULT;
>> +            /* If the no-map property isn't set then the region is valid */
>>              if (!fdt_getprop(gd->fdt_blob, rmem, "no-map", NULL))
>> -                    continue;
>> +                    attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
>> +            /* If the compatible property is set then this region may be 
>> accessed by drivers and should
>> +             * be marked valid too. */
>> +            if (fdt_getprop(gd->fdt_blob, rmem, "compatible", NULL))
>> +                    attrs = PTE_TYPE_VALID | PTE_BLOCK_MEMTYPE(MT_NORMAL);
>>  
>>              if (i == N_RESERVED_REGIONS) {
>>                      log_err("Too many reserved regions!\n");
>>                      break;
>> @@ -667,50 +681,55 @@ static void carve_out_reserved_memory(void)
>>              /* Read the address and size out from the reg property. Doing 
>> this "properly" with
>>               * fdt_get_resource() takes ~70ms on SDM845, but open-coding 
>> the happy path here
>>               * takes <1ms... Oh the woes of no dcache.
>>               */
>> -            ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", &len);
>> +            ptr = fdt_getprop(gd->fdt_blob, rmem, "reg", NULL);
>>              if (ptr) {
>>                      /* Qualcomm devices use #address/size-cells = <2> but 
>> all reserved regions are within
>>                       * the 32-bit address space. So we can cheat here for 
>> speed.
>>                       */
>>                      res[i].start = fdt32_to_cpu(ptr[1]);
>> -                    res[i].end = res[i].start + fdt32_to_cpu(ptr[3]);
>> +                    res[i].size = fdt32_to_cpu(ptr[3]);
>> +                    res[i].attrs = attrs;
>>                      i++;
>>              }
>>      }
>>  
>>      /* Sort the reserved memory regions by address */
>>      count = i;
>> -    qsort(res, count, sizeof(struct fdt_resource), fdt_cmp_res);
>> +    qsort(res, count, sizeof(res[0]), fdt_cmp_res);
>> +    debug("Mapping %d regions!\n", count);
>>  
>>      /* Now set the right attributes for them. Often a lot of the regions 
>> are tightly packed together
>> -     * so we can optimise the number of calls to mmu_change_region_attr() 
>> by combining adjacent
>> +     * so we can optimise the number of calls to 
>> mmu_change_region_attr_nobreak() by combining adjacent
>>       * regions.
>>       */
>> -    start = ALIGN_DOWN(res[0].start, SZ_2M);
>> -    size = ALIGN(res[0].end - start, SZ_2M);
>> +    start = res[0].start;
>> +    size = res[0].size;
>> +    attrs = res[0].attrs;
>> +    /* For each region after the first one, either increase the `size` to 
>> eventually be mapped or
>> +     * map the region we have and start a new one, this allows us to reduce 
>> the number of calls to
>> +     * mmu_map_region(). The loop is therefore "lagging" behind by one 
>> iteration. */
>>      for (i = 1; i <= count; i++) {
>> -            /* We ideally want to 2M align everything for more efficient 
>> pagetables, but we must avoid
>> -             * overwriting reserved memory regions which shouldn't be 
>> mapped as FAULT (like those with
>> -             * compatible properties).
>> -             * If within 2M of the previous region, bump the size to 
>> include this region. Otherwise
>> -             * start a new region.
>> -             */
>> -            if (i == count || start + size < res[i].start - SZ_2M) {
>> -                    debug("  0x%016llx - 0x%016llx: reserved\n",
>> -                          start, start + size);
>> -                    mmu_change_region_attr(start, size, PTE_TYPE_FAULT);
>> -                    /* If this is the final region then quit here before we 
>> index
>> -                     * out of bounds...
>> -                     */
>> +            /* If i == count we are done, just map the last region. If the 
>> last region is
>> +             * too far away or the attrs don't match then map the 
>> meta-region we have and
>> +             * start a new one. */
>> +            if (i == count || start + size < res[i].start - SZ_8K || attrs 
>> != res[i].attrs) {
> 
> I suppose this SZ_8K instead of SZ_2M is intentional here? It works now
> due to page table optimization I guess?

I realised there was a possibility for incorrectly mapping stuff with
2M. Probably not with the logic as it is but e.g. if you don't
explicitly map reserved regions that need to be mapped then they can get
unmapped here.

> 
> -Sumit
> 
>> +                    debug("  0x%016llx - 0x%016llx: %s\n",
>> +                            start, start + size, attrs == PTE_TYPE_FAULT ? 
>> "FAULT" : "VALID");
>> +                    /* No need to break-before-make since dcache is 
>> disabled */
>> +                    mmu_change_region_attr_nobreak(start, size, attrs);
>> +                    /* We have now mapped all the regions */
>>                      if (i == count)
>>                              break;
>> -                    start = ALIGN_DOWN(res[i].start, SZ_2M);
>> -                    size = ALIGN(res[i].end - start, SZ_2M);
>> +                    /* Start a new meta-region */
>> +                    start = res[i].start;
>> +                    size = res[i].size;
>> +                    attrs = res[i].attrs;
>>              } else {
>> -                    /* Bump size if this region is immediately after the 
>> previous one */
>> -                    size = ALIGN(res[i].end - start, SZ_2M);
>> +                    /* This region is next to (<8K) the previous one so 
>> combine them.
>> +                     * Accounting for any small (<8K) gap. */
>> +                    size = (res[i].start - start) + res[i].size;
>>              }
>>      }
>>  }
>>  
>> @@ -744,13 +763,14 @@ void enable_caches(void)
>>      gd->arch.tlb_emerg = gd->arch.tlb_addr;
>>      gd->arch.tlb_addr = tlb_addr;
>>      gd->arch.tlb_size = tlb_size;
>>  
>> -    /* We do the carveouts only for QCS404, for now. */
>> +    /* On some boards speculative access may trigger a NOC or XPU violation 
>> so explicitly mark reserved
>> +     * regions as inacessible (PTE_TYPE_FAULT) */
>>      if (fdt_node_check_compatible(gd->fdt_blob, 0, "qcom,qcs404") == 0) {
>>              carveout_start = get_timer(0);
>>              /* Takes ~20-50ms on SDM845 */
>> -            carve_out_reserved_memory();
>> +            configure_reserved_memory();
>>              debug("carveout time: %lums\n", get_timer(carveout_start));
>>      }
>>      dcache_enable();
>>  }
>>
>> -- 
>> 2.53.0
>>

-- 
// Casey (she/her)

Reply via email to