s->code_ptr and s->code_buf are 4 byte pointers on aarch64 so the cache flush is off by a factor of 4
diff --git a/tcg/tcg.c b/tcg/tcg.c index 44b923f5fe..2c4b66965b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4325,7 +4325,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) /* flush instruction cache */ flush_idcache_range((uintptr_t)tcg_mirror_rw_to_rx(s->code_buf), - (uintptr_t)s->code_buf, s->code_ptr - s->code_buf); + (uintptr_t)s->code_buf, + (uintptr_t)s->code_ptr - (uintptr_t)s->code_buf); return tcg_current_code_size(s); } With this and the other changes, split JIT works fine on iOS. -j On Thu, Oct 29, 2020 at 5:49 PM Richard Henderson <richard.hender...@linaro.org> wrote: > > We are shortly going to have a split rw/rx jit buffer. Depending > on the host, we need to flush the dcache at the rw data pointer and > flush the icache at the rx code pointer. > > For now, the two passed pointers are identical, so there is no > effective change in behaviour. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > tcg/aarch64/tcg-target.h | 9 +++++++-- > tcg/arm/tcg-target.h | 8 ++++++-- > tcg/i386/tcg-target.h | 3 ++- > tcg/mips/tcg-target.h | 8 ++++++-- > tcg/ppc/tcg-target.h | 2 +- > tcg/riscv/tcg-target.h | 8 ++++++-- > tcg/s390/tcg-target.h | 3 ++- > tcg/sparc/tcg-target.h | 8 +++++--- > tcg/tci/tcg-target.h | 3 ++- > softmmu/physmem.c | 9 ++++++++- > tcg/tcg.c | 5 +++-- > tcg/aarch64/tcg-target.c.inc | 2 +- > tcg/mips/tcg-target.c.inc | 2 +- > tcg/ppc/tcg-target.c.inc | 21 +++++++++++---------- > tcg/sparc/tcg-target.c.inc | 4 ++-- > 15 files changed, 63 insertions(+), 32 deletions(-) > > diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h > index 663dd0b95e..d0a6a059b7 100644 > --- a/tcg/aarch64/tcg-target.h > +++ b/tcg/aarch64/tcg-target.h > @@ -148,9 +148,14 @@ typedef enum { > #define TCG_TARGET_DEFAULT_MO (0) > #define TCG_TARGET_HAS_MEMORY_BSWAP 1 > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > - __builtin___clear_cache((char *)start, (char *)stop); > + /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */ > + if (rw != rx) { > + __builtin___clear_cache((char *)rw, (char *)(rw + len)); > + } > + __builtin___clear_cache((char *)rx, (char *)(rx + len)); > } > > void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); > diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h > index 17e771374d..fa88b24e43 100644 > --- a/tcg/arm/tcg-target.h > +++ b/tcg/arm/tcg-target.h > @@ -134,9 +134,13 @@ enum { > #define TCG_TARGET_DEFAULT_MO (0) > #define TCG_TARGET_HAS_MEMORY_BSWAP 1 > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > - __builtin___clear_cache((char *) start, (char *) stop); > + if (rw != rx) { > + __builtin___clear_cache((char *)rw, (char *)(rw + len)); > + } > + __builtin___clear_cache((char *)rx, (char *)(rx + len)); > } > > /* not defined -- call should be eliminated at compile time */ > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index abd4ac7fc0..8323e72639 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -206,7 +206,8 @@ extern bool have_avx2; > #define TCG_TARGET_extract_i64_valid(ofs, len) \ > (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > } > > diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h > index c6b091d849..47b1226ee9 100644 > --- a/tcg/mips/tcg-target.h > +++ b/tcg/mips/tcg-target.h > @@ -207,9 +207,13 @@ extern bool use_mips32r2_instructions; > #define TCG_TARGET_DEFAULT_MO (0) > #define TCG_TARGET_HAS_MEMORY_BSWAP 1 > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > - cacheflush ((void *)start, stop-start, ICACHE); > + if (rx != rw) { > + cacheflush((void *)rw, len, DCACHE); > + } > + cacheflush((void *)rx, len, ICACHE); > } > > void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); > diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h > index be10363956..fbb6dc1b47 100644 > --- a/tcg/ppc/tcg-target.h > +++ b/tcg/ppc/tcg-target.h > @@ -175,7 +175,7 @@ extern bool have_vsx; > #define TCG_TARGET_HAS_bitsel_vec have_vsx > #define TCG_TARGET_HAS_cmpsel_vec 0 > > -void flush_icache_range(uintptr_t start, uintptr_t stop); > +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len); > void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); > > #define TCG_TARGET_DEFAULT_MO (0) > diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h > index 032439d806..0fa6ae358e 100644 > --- a/tcg/riscv/tcg-target.h > +++ b/tcg/riscv/tcg-target.h > @@ -159,9 +159,13 @@ typedef enum { > #define TCG_TARGET_HAS_mulsh_i64 1 > #endif > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > - __builtin___clear_cache((char *)start, (char *)stop); > + if (rx != rw) { > + __builtin___clear_cache((char *)rw, (char *)(rw + len)); > + } > + __builtin___clear_cache((char *)rx, (char *)(rx + len)); > } > > /* not defined -- call should be eliminated at compile time */ > diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h > index 63c8797bd3..c3dc2e8938 100644 > --- a/tcg/s390/tcg-target.h > +++ b/tcg/s390/tcg-target.h > @@ -145,7 +145,8 @@ enum { > TCG_AREG0 = TCG_REG_R10, > }; > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > } > > diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h > index 633841ebf2..c27c40231e 100644 > --- a/tcg/sparc/tcg-target.h > +++ b/tcg/sparc/tcg-target.h > @@ -168,10 +168,12 @@ extern bool use_vis3_instructions; > #define TCG_TARGET_DEFAULT_MO (0) > #define TCG_TARGET_HAS_MEMORY_BSWAP 1 > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > - uintptr_t p; > - for (p = start & -8; p < ((stop + 7) & -8); p += 8) { > + /* No additional data flush to the RW virtual address required. */ > + uintptr_t p, end = (rx + len + 7) & -8; > + for (p = rx & -8; p < end; p += 8) { > __asm__ __volatile__("flush\t%0" : : "r" (p)); > } > } > diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h > index 8c1c1d265d..6460449719 100644 > --- a/tcg/tci/tcg-target.h > +++ b/tcg/tci/tcg-target.h > @@ -191,7 +191,8 @@ void tci_disas(uint8_t opc); > > #define HAVE_TCG_QEMU_TB_EXEC > > -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t > len) > { > } > > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > index a9adedb9f8..b23c1fef54 100644 > --- a/softmmu/physmem.c > +++ b/softmmu/physmem.c > @@ -2954,7 +2954,14 @@ static inline MemTxResult > address_space_write_rom_internal(AddressSpace *as, > invalidate_and_set_dirty(mr, addr1, l); > break; > case FLUSH_CACHE: > - flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + > l); > + /* > + * FIXME: This function is currently located in tcg/host/, > + * but we never come here when tcg is enabled; only for > + * real hardware acceleration. This can actively fail > + * when TCI is configured, since that function is a nop. > + * We should move this to util/ or something. > + */ > + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, > l); > break; > } > } > diff --git a/tcg/tcg.c b/tcg/tcg.c > index a8c28440e2..3bf36e0cfe 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -1076,7 +1076,7 @@ void tcg_prologue_init(TCGContext *s) > #endif > > buf1 = s->code_ptr; > - flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); > + flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0, buf1 - buf0); > > /* Deduct the prologue from the buffer. */ > prologue_size = tcg_current_code_size(s); > @@ -4268,7 +4268,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > } > > /* flush instruction cache */ > - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); > + flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf, > + s->code_ptr - s->code_buf); > > return tcg_current_code_size(s); > } > diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc > index 26f71cb599..83af3108a4 100644 > --- a/tcg/aarch64/tcg-target.c.inc > +++ b/tcg/aarch64/tcg-target.c.inc > @@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, > uintptr_t jmp_addr, > } > pair = (uint64_t)i2 << 32 | i1; > qatomic_set((uint64_t *)jmp_addr, pair); > - flush_icache_range(jmp_addr, jmp_addr + 8); > + flush_idcache_range(jmp_addr, jmp_addr, 8); > } > > static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) > diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc > index 41be574e89..c255ecb444 100644 > --- a/tcg/mips/tcg-target.c.inc > +++ b/tcg/mips/tcg-target.c.inc > @@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, > uintptr_t jmp_addr, > uintptr_t addr) > { > qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2)); > - flush_icache_range(jmp_addr, jmp_addr + 4); > + flush_idcache_range(jmp_addr, jmp_addr, 4); > } > > typedef struct { > diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc > index 18ee989f95..a848e98383 100644 > --- a/tcg/ppc/tcg-target.c.inc > +++ b/tcg/ppc/tcg-target.c.inc > @@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, > uintptr_t jmp_addr, > /* As per the enclosing if, this is ppc64. Avoid the _Static_assert > within qatomic_set that would fail to build a ppc32 host. */ > qatomic_set__nocheck((uint64_t *)jmp_addr, pair); > - flush_icache_range(jmp_addr, jmp_addr + 8); > + flush_idcache_range(jmp_addr, jmp_addr, 8); > } else { > intptr_t diff = addr - jmp_addr; > tcg_debug_assert(in_range_b(diff)); > qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc)); > - flush_icache_range(jmp_addr, jmp_addr + 4); > + flush_idcache_range(jmp_addr, jmp_addr, 4); > } > } > > @@ -3864,22 +3864,23 @@ void tcg_register_jit(void *buf, size_t buf_size) > } > #endif /* __ELF__ */ > > -void flush_icache_range(uintptr_t start, uintptr_t stop) > +/* Flush the dcache at RW, and the icache at RX, as necessary. */ > +void flush_idcache_range(uintptr_t rx, uintptr_t rw, uintptr_t len) > { > - uintptr_t p, start1, stop1; > + uintptr_t p, start, stop; > size_t dsize = qemu_dcache_linesize; > size_t isize = qemu_icache_linesize; > > - start1 = start & ~(dsize - 1); > - stop1 = (stop + dsize - 1) & ~(dsize - 1); > - for (p = start1; p < stop1; p += dsize) { > + start = rw & ~(dsize - 1); > + stop = (rw + len + dsize - 1) & ~(dsize - 1); > + for (p = start; p < stop; p += dsize) { > asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); > } > asm volatile ("sync" : : : "memory"); > > - start &= start & ~(isize - 1); > - stop1 = (stop + isize - 1) & ~(isize - 1); > - for (p = start1; p < stop1; p += isize) { > + start = rx & ~(isize - 1); > + stop = (rx + len + isize - 1) & ~(isize - 1); > + for (p = start; p < stop; p += isize) { > asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); > } > asm volatile ("sync" : : : "memory"); > diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc > index 6775bd30fc..6e2d755f6a 100644 > --- a/tcg/sparc/tcg-target.c.inc > +++ b/tcg/sparc/tcg-target.c.inc > @@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, > uintptr_t jmp_addr, > if (!USE_REG_TB) { > qatomic_set((uint32_t *)jmp_addr, > deposit32(CALL, 0, 30, br_disp >> 2)); > - flush_icache_range(jmp_addr, jmp_addr + 4); > + flush_idcache_range(jmp_addr, jmp_addr, 4); > return; > } > > @@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, > uintptr_t jmp_addr, > } > > qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1)); > - flush_icache_range(jmp_addr, jmp_addr + 8); > + flush_idcache_range(jmp_addr, jmp_addr, 8); > } > -- > 2.25.1 >