In preparation for using the same code for ARM32, let's move it into a header. We intentionally don't move the code into mmu-common.c as we want to give the compiler maximum leeway with inlining the page table walk.
Signed-off-by: Ahmad Fatoum <a.fat...@pengutronix.de> --- arch/arm/cpu/flush_cacheable_pages.h | 77 ++++++++++++++++++++++++++++ arch/arm/cpu/mmu_64.c | 65 +---------------------- 2 files changed, 78 insertions(+), 64 deletions(-) create mode 100644 arch/arm/cpu/flush_cacheable_pages.h diff --git a/arch/arm/cpu/flush_cacheable_pages.h b/arch/arm/cpu/flush_cacheable_pages.h new file mode 100644 index 000000000000..85fde0122802 --- /dev/null +++ b/arch/arm/cpu/flush_cacheable_pages.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: 2024 Ahmad Fatoum, Pengutronix */ + +/** + * flush_cacheable_pages - Flush only the cacheable pages in a region + * @start: Starting virtual address of the range. + * @size: Size of range + * + * This function walks the page table and flushes the data caches for the + * specified range only if the memory is marked as normal cacheable in the + * page tables. If a non-cacheable or non-normal page is encountered, + * it's skipped. + */ +/** + * flush_cacheable_pages - Flush only the cacheable pages in a region + * @start: Starting virtual address of the range. + * @size: Size of range + * + * This function walks the page table and flushes the data caches for the + * specified range only if the memory is marked as normal cacheable in the + * page tables. If a non-cacheable or non-normal page is encountered, + * it's skipped. + */ +static void flush_cacheable_pages(void *start, size_t size) +{ + mmu_addr_t flush_start = ~0UL, flush_end = ~0UL; + mmu_addr_t region_start, region_end; + size_t block_size; + mmu_addr_t *ttb; + + region_start = PAGE_ALIGN_DOWN((ulong)start); + region_end = PAGE_ALIGN(region_start + size) - 1; + + ttb = get_ttb(); + + /* + * TODO: This loop could be made more optimal by inlining the page walk, + * so we need not restart address translation from the top every time. + * + * The hope is that with the page tables being cached and the + * windows being remapped being small, the overhead compared to + * actually flushing the ranges isn't too significant. + */ + for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) { + unsigned level; + mmu_addr_t *pte = find_pte(ttb, addr, &level); + + block_size = granule_size(level); + + if (!pte || !pte_is_cacheable(*pte)) + continue; + + if (flush_end == addr) { + /* + * While it's safe to flush the whole block_size, + * it's unnecessary time waste to go beyond region_end. + */ + flush_end = min(flush_end + block_size, region_end); + continue; + } + + /* + * We don't have a previous contiguous flush area to append to. + * If we recorded any area before, let's flush it now + */ + if (flush_start != ~0UL) + dma_flush_range_end(flush_start, flush_end); + + /* and start the new contiguous flush area with this page */ + flush_start = addr; + flush_end = min(flush_start + block_size, region_end); + } + + /* The previous loop won't flush the last cached range, so do it here */ + if (flush_start != ~0UL) + dma_flush_range_end(flush_start, flush_end); +} diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c index a20cb39a9296..50bb25b5373a 100644 --- a/arch/arm/cpu/mmu_64.c +++ b/arch/arm/cpu/mmu_64.c @@ -273,70 +273,7 @@ static inline void dma_flush_range_end(unsigned long start, unsigned long end) v8_flush_dcache_range(start, end + 1); } -/** - * flush_cacheable_pages - Flush only the cacheable pages in a region - * @start: Starting virtual address of the range. - * @size: Size of range - * - * This function walks the page table and flushes the data caches for the - * specified range only if the memory is marked as normal cacheable in the - * page tables. If a non-cacheable or non-normal page is encountered, - * it's skipped. - */ -static void flush_cacheable_pages(void *start, size_t size) -{ - mmu_addr_t flush_start = ~0UL, flush_end = ~0UL; - mmu_addr_t region_start, region_end; - size_t block_size; - mmu_addr_t *ttb; - - region_start = PAGE_ALIGN_DOWN((ulong)start); - region_end = PAGE_ALIGN(region_start + size) - 1; - - ttb = get_ttb(); - - /* - * TODO: This loop could be made more optimal by inlining the page walk, - * so we need not restart address translation from the top every time. - * - * The hope is that with the page tables being cached and the - * windows being remapped being small, the overhead compared to - * actually flushing the ranges isn't too significant. - */ - for (mmu_addr_t addr = region_start; addr < region_end; addr += block_size) { - unsigned level; - mmu_addr_t *pte = find_pte(ttb, addr, &level); - - block_size = granule_size(level); - - if (!pte || !pte_is_cacheable(*pte)) - continue; - - if (flush_end == addr) { - /* - * While it's safe to flush the whole block_size, - * it's unnecessary time waste to go beyond region_end. - */ - flush_end = min(flush_end + block_size, region_end); - continue; - } - - /* - * We don't have a previous contiguous flush area to append to. - * If we recorded any area before, let's flush it now - */ - if (flush_start != ~0UL) - dma_flush_range_end(flush_start, flush_end); - - /* and start the new contiguous flush area with this page */ - flush_start = addr; - flush_end = min(flush_start + block_size, region_end); - } - - /* The previous loop won't flush the last cached range, so do it here */ - if (flush_start != ~0UL) - dma_flush_range_end(flush_start, flush_end); -} +#include "flush_cacheable_pages.h" static void early_remap_range(uint64_t addr, size_t size, maptype_t map_type) { -- 2.39.5