On Sat, Mar 23, 2019 at 12:47 PM Richard Henderson <richard.hender...@linaro.org> wrote: > > Move all softmmu tlb data into this structure. Arrange the > members so that we are able to place mask+table together and > at a smaller absolute offset from ENV. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
Acked-by: Alistair Francis <alistair.fran...@wdc.com> Alistair > --- > accel/tcg/softmmu_template.h | 4 +- > include/exec/cpu-defs.h | 61 ++++++++------- > include/exec/cpu_ldst.h | 6 +- > accel/tcg/cputlb.c | 147 ++++++++++++++++++----------------- > target/arm/translate-a64.c | 2 +- > tcg/aarch64/tcg-target.inc.c | 10 +-- > tcg/arm/tcg-target.inc.c | 10 +-- > tcg/i386/tcg-target.inc.c | 4 +- > tcg/mips/tcg-target.inc.c | 12 +-- > tcg/ppc/tcg-target.inc.c | 8 +- > tcg/riscv/tcg-target.inc.c | 12 +-- > tcg/s390/tcg-target.inc.c | 8 +- > tcg/sparc/tcg-target.inc.c | 12 +-- > 13 files changed, 135 insertions(+), 161 deletions(-) > > diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h > index e970a8b378..fc6371aed1 100644 > --- a/accel/tcg/softmmu_template.h > +++ b/accel/tcg/softmmu_template.h > @@ -102,7 +102,7 @@ static inline DATA_TYPE glue(io_read, > SUFFIX)(CPUArchState *env, > bool recheck, > MMUAccessType access_type) > { > - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; > + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck, > access_type, DATA_SIZE); > } > @@ -273,7 +273,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState > *env, > uintptr_t retaddr, > bool recheck) > { > - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; > + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, > recheck, DATA_SIZE); > } > diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h > index 2694481769..fbe8945606 100644 > --- a/include/exec/cpu-defs.h > +++ b/include/exec/cpu-defs.h > @@ -78,6 +78,7 @@ typedef uint64_t target_ulong; > #endif > > #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) > + > /* use a fully associative victim tlb of 8 entries */ > #define CPU_VTLB_SIZE 8 > > @@ -147,6 +148,10 @@ typedef struct CPUIOTLBEntry { > MemTxAttrs attrs; > } CPUIOTLBEntry; > > +/* > + * Data elements that are per MMU mode, minus the bits accessed by > + * the TCG fast path. > + */ > typedef struct CPUTLBDesc { > /* > * Describe a region covering all of the large pages allocated > @@ -160,16 +165,31 @@ typedef struct CPUTLBDesc { > int64_t window_begin_ns; > /* maximum number of entries observed in the window */ > size_t window_max_entries; > + size_t n_used_entries; > /* The next index to use in the tlb victim table. */ > size_t vindex; > - size_t n_used_entries; > + /* The tlb victim table, in two parts. */ > + CPUTLBEntry vtable[CPU_VTLB_SIZE]; > + CPUIOTLBEntry viotlb[CPU_VTLB_SIZE]; > + /* The iotlb. */ > + CPUIOTLBEntry *iotlb; > } CPUTLBDesc; > > +/* > + * Data elements that are per MMU mode, accessed by the fast path. > + */ > +typedef struct CPUTLBDescFast { > + /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ > + uintptr_t mask; > + /* The array of tlb entries itself. */ > + CPUTLBEntry *table; > +} CPUTLBDescFast; > + > /* > * Data elements that are shared between all MMU modes. > */ > typedef struct CPUTLBCommon { > - /* Serialize updates to tlb_table and tlb_v_table, and others as noted. > */ > + /* Serialize updates to tlb_table and vtable, and others as noted. */ > QemuSpin lock; > /* > * Within dirty, for each bit N, modifications have been made to > @@ -187,35 +207,24 @@ typedef struct CPUTLBCommon { > size_t elide_flush_count; > } CPUTLBCommon; > > -# define CPU_TLB \ > - /* tlb_mask[i] contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ \ > - uintptr_t tlb_mask[NB_MMU_MODES]; \ > - CPUTLBEntry *tlb_table[NB_MMU_MODES]; > -# define CPU_IOTLB \ > - CPUIOTLBEntry *iotlb[NB_MMU_MODES]; > - > /* > + * The entire softmmu tlb, for all MMU modes. > * The meaning of each of the MMU modes is defined in the target code. > - * Note that NB_MMU_MODES is not yet defined; we can only reference it > - * within preprocessor defines that will be expanded later. > */ > -#define CPU_COMMON_TLB \ > - CPUTLBCommon tlb_c; \ > - CPUTLBDesc tlb_d[NB_MMU_MODES]; \ > - CPU_TLB \ > - CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ > - CPU_IOTLB \ > - CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE]; > +typedef struct CPUTLB { > + CPUTLBDescFast f[NB_MMU_MODES]; > + CPUTLBDesc d[NB_MMU_MODES]; > + CPUTLBCommon c; > +} CPUTLB; > + > +/* There are target-specific members named "tlb". This is temporary. */ > +#define CPU_COMMON CPUTLB tlb_; > +#define env_tlb(ENV) (&(ENV)->tlb_) > > #else > > -#define CPU_COMMON_TLB > - > -#endif > - > - > -#define CPU_COMMON \ > - /* soft mmu support */ \ > - CPU_COMMON_TLB \ > +#define CPU_COMMON /* Nothing */ > + > +#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */ > > #endif > diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h > index d78041d7a0..09abd95008 100644 > --- a/include/exec/cpu_ldst.h > +++ b/include/exec/cpu_ldst.h > @@ -139,21 +139,21 @@ static inline target_ulong tlb_addr_write(const > CPUTLBEntry *entry) > static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, > target_ulong addr) > { > - uintptr_t size_mask = env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS; > + uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> > CPU_TLB_ENTRY_BITS; > > return (addr >> TARGET_PAGE_BITS) & size_mask; > } > > static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) > { > - return (env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS) + 1; > + return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; > } > > /* Find the TLB entry corresponding to the mmu_idx + address pair. */ > static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, > target_ulong addr) > { > - return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; > + return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)]; > } > > #ifdef MMU_MODE0_SUFFIX > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index 23586f9974..c28b6b6328 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -76,7 +76,7 @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); > > static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) > { > - return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS); > + return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); > } > > static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, > @@ -91,14 +91,14 @@ static void tlb_dyn_init(CPUArchState *env) > int i; > > for (i = 0; i < NB_MMU_MODES; i++) { > - CPUTLBDesc *desc = &env->tlb_d[i]; > + CPUTLBDesc *desc = &env_tlb(env)->d[i]; > size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; > > tlb_window_reset(desc, get_clock_realtime(), 0); > desc->n_used_entries = 0; > - env->tlb_mask[i] = (n_entries - 1) << CPU_TLB_ENTRY_BITS; > - env->tlb_table[i] = g_new(CPUTLBEntry, n_entries); > - env->iotlb[i] = g_new(CPUIOTLBEntry, n_entries); > + env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; > + env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); > + env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); > } > } > > @@ -144,7 +144,7 @@ static void tlb_dyn_init(CPUArchState *env) > */ > static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) > { > - CPUTLBDesc *desc = &env->tlb_d[mmu_idx]; > + CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; > size_t old_size = tlb_n_entries(env, mmu_idx); > size_t rate; > size_t new_size = old_size; > @@ -187,14 +187,14 @@ static void tlb_mmu_resize_locked(CPUArchState *env, > int mmu_idx) > return; > } > > - g_free(env->tlb_table[mmu_idx]); > - g_free(env->iotlb[mmu_idx]); > + g_free(env_tlb(env)->f[mmu_idx].table); > + g_free(env_tlb(env)->d[mmu_idx].iotlb); > > tlb_window_reset(desc, now, 0); > /* desc->n_used_entries is cleared by the caller */ > - env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; > - env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); > - env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); > + env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; > + env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); > + env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); > /* > * If the allocations fail, try smaller sizes. We just freed some > * memory, so going back to half of new_size has a good chance of > working. > @@ -202,46 +202,47 @@ static void tlb_mmu_resize_locked(CPUArchState *env, > int mmu_idx) > * allocations to fail though, so we progressively reduce the allocation > * size, aborting if we cannot even allocate the smallest TLB we support. > */ > - while (env->tlb_table[mmu_idx] == NULL || env->iotlb[mmu_idx] == NULL) { > + while (env_tlb(env)->f[mmu_idx].table == NULL || > + env_tlb(env)->d[mmu_idx].iotlb == NULL) { > if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { > error_report("%s: %s", __func__, strerror(errno)); > abort(); > } > new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); > - env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; > + env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; > > - g_free(env->tlb_table[mmu_idx]); > - g_free(env->iotlb[mmu_idx]); > - env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); > - env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); > + g_free(env_tlb(env)->f[mmu_idx].table); > + g_free(env_tlb(env)->d[mmu_idx].iotlb); > + env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); > + env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); > } > } > > static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) > { > tlb_mmu_resize_locked(env, mmu_idx); > - memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx)); > - env->tlb_d[mmu_idx].n_used_entries = 0; > + memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); > + env_tlb(env)->d[mmu_idx].n_used_entries = 0; > } > > static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t > mmu_idx) > { > - env->tlb_d[mmu_idx].n_used_entries++; > + env_tlb(env)->d[mmu_idx].n_used_entries++; > } > > static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t > mmu_idx) > { > - env->tlb_d[mmu_idx].n_used_entries--; > + env_tlb(env)->d[mmu_idx].n_used_entries--; > } > > void tlb_init(CPUState *cpu) > { > CPUArchState *env = cpu->env_ptr; > > - qemu_spin_init(&env->tlb_c.lock); > + qemu_spin_init(&env_tlb(env)->c.lock); > > /* Ensure that cpu_reset performs a full flush. */ > - env->tlb_c.dirty = ALL_MMUIDX_BITS; > + env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; > > tlb_dyn_init(env); > } > @@ -273,9 +274,9 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, > size_t *pelide) > CPU_FOREACH(cpu) { > CPUArchState *env = cpu->env_ptr; > > - full += atomic_read(&env->tlb_c.full_flush_count); > - part += atomic_read(&env->tlb_c.part_flush_count); > - elide += atomic_read(&env->tlb_c.elide_flush_count); > + full += atomic_read(&env_tlb(env)->c.full_flush_count); > + part += atomic_read(&env_tlb(env)->c.part_flush_count); > + elide += atomic_read(&env_tlb(env)->c.elide_flush_count); > } > *pfull = full; > *ppart = part; > @@ -285,10 +286,11 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, > size_t *pelide) > static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) > { > tlb_table_flush_by_mmuidx(env, mmu_idx); > - memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); > - env->tlb_d[mmu_idx].large_page_addr = -1; > - env->tlb_d[mmu_idx].large_page_mask = -1; > - env->tlb_d[mmu_idx].vindex = 0; > + env_tlb(env)->d[mmu_idx].large_page_addr = -1; > + env_tlb(env)->d[mmu_idx].large_page_mask = -1; > + env_tlb(env)->d[mmu_idx].vindex = 0; > + memset(env_tlb(env)->d[mmu_idx].vtable, -1, > + sizeof(env_tlb(env)->d[0].vtable)); > } > > static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data > data) > @@ -301,31 +303,31 @@ static void tlb_flush_by_mmuidx_async_work(CPUState > *cpu, run_on_cpu_data data) > > tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); > > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > > - all_dirty = env->tlb_c.dirty; > + all_dirty = env_tlb(env)->c.dirty; > to_clean = asked & all_dirty; > all_dirty &= ~to_clean; > - env->tlb_c.dirty = all_dirty; > + env_tlb(env)->c.dirty = all_dirty; > > for (work = to_clean; work != 0; work &= work - 1) { > int mmu_idx = ctz32(work); > tlb_flush_one_mmuidx_locked(env, mmu_idx); > } > > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > > cpu_tb_jmp_cache_clear(cpu); > > if (to_clean == ALL_MMUIDX_BITS) { > - atomic_set(&env->tlb_c.full_flush_count, > - env->tlb_c.full_flush_count + 1); > + atomic_set(&env_tlb(env)->c.full_flush_count, > + env_tlb(env)->c.full_flush_count + 1); > } else { > - atomic_set(&env->tlb_c.part_flush_count, > - env->tlb_c.part_flush_count + ctpop16(to_clean)); > + atomic_set(&env_tlb(env)->c.part_flush_count, > + env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); > if (to_clean != asked) { > - atomic_set(&env->tlb_c.elide_flush_count, > - env->tlb_c.elide_flush_count + > + atomic_set(&env_tlb(env)->c.elide_flush_count, > + env_tlb(env)->c.elide_flush_count + > ctpop16(asked & ~to_clean)); > } > } > @@ -410,11 +412,12 @@ static inline bool tlb_flush_entry_locked(CPUTLBEntry > *tlb_entry, > static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, > target_ulong page) > { > + CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; > int k; > > assert_cpu_is_self(ENV_GET_CPU(env)); > for (k = 0; k < CPU_VTLB_SIZE; k++) { > - if (tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page)) { > + if (tlb_flush_entry_locked(&d->vtable[k], page)) { > tlb_n_used_entries_dec(env, mmu_idx); > } > } > @@ -423,8 +426,8 @@ static inline void > tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, > static void tlb_flush_page_locked(CPUArchState *env, int midx, > target_ulong page) > { > - target_ulong lp_addr = env->tlb_d[midx].large_page_addr; > - target_ulong lp_mask = env->tlb_d[midx].large_page_mask; > + target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; > + target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; > > /* Check if we need to flush due to large pages. */ > if ((page & lp_mask) == lp_addr) { > @@ -459,13 +462,13 @@ static void > tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, > tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", > addr, mmu_idx_bitmap); > > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { > if (test_bit(mmu_idx, &mmu_idx_bitmap)) { > tlb_flush_page_locked(env, mmu_idx, addr); > } > } > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > > tb_flush_jmp_cache(cpu, addr); > } > @@ -609,22 +612,22 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, > ram_addr_t length) > int mmu_idx; > > env = cpu->env_ptr; > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { > unsigned int i; > unsigned int n = tlb_n_entries(env, mmu_idx); > > for (i = 0; i < n; i++) { > - tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, > - length); > + tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], > + start1, length); > } > > for (i = 0; i < CPU_VTLB_SIZE; i++) { > - tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], > start1, > - length); > + tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], > + start1, length); > } > } > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > } > > /* Called with tlb_c.lock held */ > @@ -646,7 +649,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) > assert_cpu_is_self(cpu); > > vaddr &= TARGET_PAGE_MASK; > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { > tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); > } > @@ -654,10 +657,10 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) > for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { > int k; > for (k = 0; k < CPU_VTLB_SIZE; k++) { > - tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); > + tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], > vaddr); > } > } > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > } > > /* Our TLB does not support large pages, so remember the area covered by > @@ -665,7 +668,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) > static void tlb_add_large_page(CPUArchState *env, int mmu_idx, > target_ulong vaddr, target_ulong size) > { > - target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr; > + target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; > target_ulong lp_mask = ~(size - 1); > > if (lp_addr == (target_ulong)-1) { > @@ -675,13 +678,13 @@ static void tlb_add_large_page(CPUArchState *env, int > mmu_idx, > /* Extend the existing region to include the new page. > This is a compromise between unnecessary flushes and > the cost of maintaining a full variable size TLB. */ > - lp_mask &= env->tlb_d[mmu_idx].large_page_mask; > + lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; > while (((lp_addr ^ vaddr) & lp_mask) != 0) { > lp_mask <<= 1; > } > } > - env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask; > - env->tlb_d[mmu_idx].large_page_mask = lp_mask; > + env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; > + env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; > } > > /* Add a new TLB entry. At most one entry for a given virtual address > @@ -757,10 +760,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, > target_ulong vaddr, > * a longer critical section, but this is not a concern since the TLB > lock > * is unlikely to be contended. > */ > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > > /* Note that the tlb is no longer clean. */ > - env->tlb_c.dirty |= 1 << mmu_idx; > + env_tlb(env)->c.dirty |= 1 << mmu_idx; > > /* Make sure there's no cached translation for the new page. */ > tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); > @@ -770,12 +773,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, > target_ulong vaddr, > * different page; otherwise just overwrite the stale data. > */ > if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { > - unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE; > - CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; > + unsigned vidx = env_tlb(env)->d[mmu_idx].vindex++ % CPU_VTLB_SIZE; > + CPUTLBEntry *tv = &env_tlb(env)->d[mmu_idx].vtable[vidx]; > > /* Evict the old entry into the victim tlb. */ > copy_tlb_helper_locked(tv, te); > - env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; > + env_tlb(env)->d[mmu_idx].viotlb[vidx] = > env_tlb(env)->d[mmu_idx].iotlb[index]; > tlb_n_used_entries_dec(env, mmu_idx); > } > > @@ -792,8 +795,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong > vaddr, > * subtract here is that of the page base, and not the same as the > * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). > */ > - env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page; > - env->iotlb[mmu_idx][index].attrs = attrs; > + env_tlb(env)->d[mmu_idx].iotlb[index].addr = iotlb - vaddr_page; > + env_tlb(env)->d[mmu_idx].iotlb[index].attrs = attrs; > > /* Now calculate the new entry */ > tn.addend = addend - vaddr_page; > @@ -829,7 +832,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong > vaddr, > > copy_tlb_helper_locked(te, &tn); > tlb_n_used_entries_inc(env, mmu_idx); > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > } > > /* Add a new TLB entry, but without specifying the memory > @@ -996,7 +999,7 @@ static bool victim_tlb_hit(CPUArchState *env, size_t > mmu_idx, size_t index, > > assert_cpu_is_self(ENV_GET_CPU(env)); > for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { > - CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; > + CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; > target_ulong cmp; > > /* elt_ofs might correspond to .addr_write, so use atomic_read */ > @@ -1008,16 +1011,16 @@ static bool victim_tlb_hit(CPUArchState *env, size_t > mmu_idx, size_t index, > > if (cmp == page) { > /* Found entry in victim tlb, swap tlb and iotlb. */ > - CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; > + CPUTLBEntry tmptlb, *tlb = > &env_tlb(env)->f[mmu_idx].table[index]; > > - qemu_spin_lock(&env->tlb_c.lock); > + qemu_spin_lock(&env_tlb(env)->c.lock); > copy_tlb_helper_locked(&tmptlb, tlb); > copy_tlb_helper_locked(tlb, vtlb); > copy_tlb_helper_locked(vtlb, &tmptlb); > - qemu_spin_unlock(&env->tlb_c.lock); > + qemu_spin_unlock(&env_tlb(env)->c.lock); > > - CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; > - CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; > + CPUIOTLBEntry tmpio, *io = > &env_tlb(env)->d[mmu_idx].iotlb[index]; > + CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; > tmpio = *io; *io = *vio; *vio = tmpio; > return true; > } > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 1959046343..9bd23f5cae 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -14163,7 +14163,7 @@ static bool is_guarded_page(CPUARMState *env, > DisasContext *s) > * table entry even for that case. > */ > return (tlb_hit(entry->addr_code, addr) && > - env->iotlb[mmu_idx][index].attrs.target_tlb_bit0); > + env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0); > #endif > } > > diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c > index d57f9e500f..5e6af10faf 100644 > --- a/tcg/aarch64/tcg-target.inc.c > +++ b/tcg/aarch64/tcg-target.inc.c > @@ -1451,12 +1451,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool > is_ld, TCGMemOpIdx oi, > label->label_ptr[0] = label_ptr; > } > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > /* We expect to use a 24-bit unsigned offset from ENV. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) > +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table) > > 0xffffff); > > /* Load and compare a TLB entry, emitting the conditional jump to the > @@ -1467,8 +1463,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg > addr_reg, TCGMemOp opc, > tcg_insn_unit **label_ptr, int mem_index, > bool is_read) > { > - int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_ofs = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_ofs = offsetof(CPUArchState, tlb_.f[mem_index].table); > unsigned a_bits = get_alignment_bits(opc); > unsigned s_bits = opc & MO_SIZE; > unsigned a_mask = (1u << a_bits) - 1; > diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c > index 2245a8aeb9..04c2eebb41 100644 > --- a/tcg/arm/tcg-target.inc.c > +++ b/tcg/arm/tcg-target.inc.c > @@ -1235,12 +1235,8 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg > argreg, > > #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > /* We expect to use a 20-bit unsigned offset from ENV. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) > +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table) > > 0xfffff); > > /* Load and compare a TLB entry, leaving the flags set. Returns the register > @@ -1251,8 +1247,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg > addrlo, TCGReg addrhi, > { > int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) > : offsetof(CPUTLBEntry, addr_write)); > - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table); > TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; > unsigned s_bits = opc & MO_SIZE; > unsigned a_bits = get_alignment_bits(opc); > diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c > index e0670e5098..1bd33389c9 100644 > --- a/tcg/i386/tcg-target.inc.c > +++ b/tcg/i386/tcg-target.inc.c > @@ -1654,10 +1654,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, > TCGReg addrlo, TCGReg addrhi, > TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); > > tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0, > - offsetof(CPUArchState, tlb_mask[mem_index])); > + offsetof(CPUArchState, tlb_.f[mem_index].mask)); > > tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0, > - offsetof(CPUArchState, tlb_table[mem_index])); > + offsetof(CPUArchState, tlb_.f[mem_index].table)); > > /* If the required alignment is at least as large as the access, simply > copy the address and mask. For lesser alignments, check that we don't > diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c > index 8a92e916dd..b827579317 100644 > --- a/tcg/mips/tcg-target.inc.c > +++ b/tcg/mips/tcg-target.inc.c > @@ -1201,14 +1201,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int > i, TCGReg al, TCGReg ah) > return i; > } > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > -/* We expect tlb_mask to be "near" tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) - > - offsetof(CPUArchState, tlb_mask) >= 0x8000); > - > /* > * Perform the tlb comparison operation. > * The complete host address is placed in BASE. > @@ -1222,8 +1214,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg > base, TCGReg addrl, > unsigned s_bits = opc & MO_SIZE; > unsigned a_bits = get_alignment_bits(opc); > int mem_index = get_mmuidx(oi); > - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > int add_off = offsetof(CPUTLBEntry, addend); > int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) > : offsetof(CPUTLBEntry, addr_write)); > diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c > index 773690f1d9..1f717745c1 100644 > --- a/tcg/ppc/tcg-target.inc.c > +++ b/tcg/ppc/tcg-target.inc.c > @@ -1505,10 +1505,6 @@ static void * const qemu_st_helpers[16] = { > [MO_BEQ] = helper_be_stq_mmu, > }; > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > /* Perform the TLB load and compare. Places the result of the comparison > in CR7, loads the addend of the TLB into R3, and returns the register > containing the guest address (zero-extended into R4). Clobbers R0 and > R2. */ > @@ -1521,8 +1517,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp > opc, > = (is_read > ? offsetof(CPUTLBEntry, addr_read) > : offsetof(CPUTLBEntry, addr_write)); > - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table); > TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; > unsigned s_bits = opc & MO_SIZE; > unsigned a_bits = get_alignment_bits(opc); > diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c > index b785f4acb7..c1f9c784bc 100644 > --- a/tcg/riscv/tcg-target.inc.c > +++ b/tcg/riscv/tcg-target.inc.c > @@ -961,14 +961,6 @@ static void * const qemu_st_helpers[16] = { > /* We don't support oversize guests */ > QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS); > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > -/* We expect tlb_mask to be "near" tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) - > - offsetof(CPUArchState, tlb_mask) >= 0x800); > - > static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, > TCGReg addrh, TCGMemOpIdx oi, > tcg_insn_unit **label_ptr, bool is_load) > @@ -981,8 +973,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, > int mask_off, table_off; > TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; > > - mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + table_off = offsetof(CPUArchState, tlb_.f[mem_index].table); > if (table_off > 0x7ff) { > int mask_hi = mask_off - sextreg(mask_off, 0, 12); > int table_hi = table_off - sextreg(table_off, 0, 12); > diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c > index 7db90b3bae..3a8794d9bd 100644 > --- a/tcg/s390/tcg-target.inc.c > +++ b/tcg/s390/tcg-target.inc.c > @@ -1538,9 +1538,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, > TCGMemOp opc, TCGReg data, > #include "tcg-ldst.inc.c" > > /* We're expecting to use a 20-bit signed offset on the tlb memory ops. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_mask[NB_MMU_MODES - 1]) > - > 0x7ffff); > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) > +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table) > > 0x7ffff); > > /* Load and compare a TLB entry, leaving the flags set. Loads the TLB > @@ -1552,8 +1550,8 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg > addr_reg, TCGMemOp opc, > unsigned a_bits = get_alignment_bits(opc); > unsigned s_mask = (1 << s_bits) - 1; > unsigned a_mask = (1 << a_bits) - 1; > - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table); > int ofs, a_off; > uint64_t tlb_mask; > > diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c > index 7a61839dc1..be10124e11 100644 > --- a/tcg/sparc/tcg-target.inc.c > +++ b/tcg/sparc/tcg-target.inc.c > @@ -1074,19 +1074,11 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int > count) > The result of the TLB comparison is in %[ix]cc. The sanitized address > is in the returned register, maybe %o0. The TLB addend is in %o1. */ > > -/* We expect tlb_mask to be before tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) < > - offsetof(CPUArchState, tlb_mask)); > - > -/* We expect tlb_mask to be "near" tlb_table. */ > -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) - > - offsetof(CPUArchState, tlb_mask) >= (1 << 13)); > - > static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, > TCGMemOp opc, int which) > { > - int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); > - int table_off = offsetof(CPUArchState, tlb_table[mem_index]); > + int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask); > + int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table); > TCGReg base = TCG_AREG0; > const TCGReg r0 = TCG_REG_O0; > const TCGReg r1 = TCG_REG_O1; > -- > 2.17.1 > >