dcache writeback and icache invalidate is not required when icache is coherent, a shorter fixed-length sequence can be used which just has to flush and re-fetch instructions that were in-flight.
Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- I haven't been able to measure a significant performance difference with this, qemu isn't flushing large ranges frequently so the old sequence is not that slow. include/qemu/cacheflush.h | 4 ++++ util/cacheflush.c | 9 +++++++++ util/cacheinfo.c | 16 ++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h index ae20bcda73..f65349ce3c 100644 --- a/include/qemu/cacheflush.h +++ b/include/qemu/cacheflush.h @@ -28,6 +28,10 @@ static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) #else +#if defined(__powerpc__) +extern bool have_coherent_icache; +#endif + void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len); #endif diff --git a/util/cacheflush.c b/util/cacheflush.c index 4b57186d89..15060f78b8 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -108,7 +108,16 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) size_t isize = qemu_icache_linesize; b = rw & ~(dsize - 1); + + if (have_coherent_icache) { + asm volatile ("sync" : : : "memory"); + asm volatile ("icbi 0,%0" : : "r"(b) : "memory"); + asm volatile ("isync" : : : "memory"); + return; + } + e = (rw + len + dsize - 1) & ~(dsize - 1); + for (p = b; p < e; p += dsize) { asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); } diff --git a/util/cacheinfo.c b/util/cacheinfo.c index ab1644d490..b632ff47ae 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -133,18 +133,30 @@ static void arch_cache_info(int *isize, int *dsize) } } -#elif defined(_ARCH_PPC) && defined(__linux__) -# include "elf.h" +#elif defined(__powerpc__) +bool have_coherent_icache = false; + +# if defined(_ARCH_PPC) && defined(__linux__) +# include "elf.h" static void arch_cache_info(int *isize, int *dsize) { +# ifdef PPC_FEATURE_ICACHE_SNOOP + unsigned long hwcap = qemu_getauxval(AT_HWCAP); +# endif + if (*isize == 0) { *isize = qemu_getauxval(AT_ICACHEBSIZE); } if (*dsize == 0) { *dsize = qemu_getauxval(AT_DCACHEBSIZE); } + +# ifdef PPC_FEATURE_ICACHE_SNOOP + have_coherent_icache = (hwcap & PPC_FEATURE_ICACHE_SNOOP) != 0; +# endif } +# endif #else static void arch_cache_info(int *isize, int *dsize) { } -- 2.35.1