Re: [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB

Alistair Francis Tue, 26 Mar 2019 13:38:10 -0700

On Sat, Mar 23, 2019 at 12:47 PM Richard Henderson
<richard.hender...@linaro.org> wrote:
>
> Move all softmmu tlb data into this structure.  Arrange the
> members so that we are able to place mask+table together and
> at a smaller absolute offset from ENV.
>
> Signed-off-by: Richard Henderson <richard.hender...@linaro.org>


Acked-by: Alistair Francis <alistair.fran...@wdc.com>

Alistair

> ---
>  accel/tcg/softmmu_template.h |   4 +-
>  include/exec/cpu-defs.h      |  61 ++++++++-------
>  include/exec/cpu_ldst.h      |   6 +-
>  accel/tcg/cputlb.c           | 147 ++++++++++++++++++-----------------
>  target/arm/translate-a64.c   |   2 +-
>  tcg/aarch64/tcg-target.inc.c |  10 +--
>  tcg/arm/tcg-target.inc.c     |  10 +--
>  tcg/i386/tcg-target.inc.c    |   4 +-
>  tcg/mips/tcg-target.inc.c    |  12 +--
>  tcg/ppc/tcg-target.inc.c     |   8 +-
>  tcg/riscv/tcg-target.inc.c   |  12 +--
>  tcg/s390/tcg-target.inc.c    |   8 +-
>  tcg/sparc/tcg-target.inc.c   |  12 +--
>  13 files changed, 135 insertions(+), 161 deletions(-)
>
> diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
> index e970a8b378..fc6371aed1 100644
> --- a/accel/tcg/softmmu_template.h
> +++ b/accel/tcg/softmmu_template.h
> @@ -102,7 +102,7 @@ static inline DATA_TYPE glue(io_read, 
> SUFFIX)(CPUArchState *env,
>                                                bool recheck,
>                                                MMUAccessType access_type)
>  {
> -    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +    CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
>      return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck,
>                      access_type, DATA_SIZE);
>  }
> @@ -273,7 +273,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState 
> *env,
>                                            uintptr_t retaddr,
>                                            bool recheck)
>  {
> -    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +    CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
>      return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
>                       recheck, DATA_SIZE);
>  }
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 2694481769..fbe8945606 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -78,6 +78,7 @@ typedef uint64_t target_ulong;
>  #endif
>
>  #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
> +
>  /* use a fully associative victim tlb of 8 entries */
>  #define CPU_VTLB_SIZE 8
>
> @@ -147,6 +148,10 @@ typedef struct CPUIOTLBEntry {
>      MemTxAttrs attrs;
>  } CPUIOTLBEntry;
>
> +/*
> + * Data elements that are per MMU mode, minus the bits accessed by
> + * the TCG fast path.
> + */
>  typedef struct CPUTLBDesc {
>      /*
>       * Describe a region covering all of the large pages allocated
> @@ -160,16 +165,31 @@ typedef struct CPUTLBDesc {
>      int64_t window_begin_ns;
>      /* maximum number of entries observed in the window */
>      size_t window_max_entries;
> +    size_t n_used_entries;
>      /* The next index to use in the tlb victim table.  */
>      size_t vindex;
> -    size_t n_used_entries;
> +    /* The tlb victim table, in two parts.  */
> +    CPUTLBEntry vtable[CPU_VTLB_SIZE];
> +    CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
> +    /* The iotlb.  */
> +    CPUIOTLBEntry *iotlb;
>  } CPUTLBDesc;
>
> +/*
> + * Data elements that are per MMU mode, accessed by the fast path.
> + */
> +typedef struct CPUTLBDescFast {
> +    /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
> +    uintptr_t mask;
> +    /* The array of tlb entries itself. */
> +    CPUTLBEntry *table;
> +} CPUTLBDescFast;
> +
>  /*
>   * Data elements that are shared between all MMU modes.
>   */
>  typedef struct CPUTLBCommon {
> -    /* Serialize updates to tlb_table and tlb_v_table, and others as noted. 
> */
> +    /* Serialize updates to tlb_table and vtable, and others as noted. */
>      QemuSpin lock;
>      /*
>       * Within dirty, for each bit N, modifications have been made to
> @@ -187,35 +207,24 @@ typedef struct CPUTLBCommon {
>      size_t elide_flush_count;
>  } CPUTLBCommon;
>
> -# define CPU_TLB                                                        \
> -    /* tlb_mask[i] contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */    \
> -    uintptr_t tlb_mask[NB_MMU_MODES];                                   \
> -    CPUTLBEntry *tlb_table[NB_MMU_MODES];
> -# define CPU_IOTLB                              \
> -    CPUIOTLBEntry *iotlb[NB_MMU_MODES];
> -
>  /*
> + * The entire softmmu tlb, for all MMU modes.
>   * The meaning of each of the MMU modes is defined in the target code.
> - * Note that NB_MMU_MODES is not yet defined; we can only reference it
> - * within preprocessor defines that will be expanded later.
>   */
> -#define CPU_COMMON_TLB \
> -    CPUTLBCommon tlb_c;                                                 \
> -    CPUTLBDesc tlb_d[NB_MMU_MODES];                                     \
> -    CPU_TLB                                                             \
> -    CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE];               \
> -    CPU_IOTLB                                                           \
> -    CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];
> +typedef struct CPUTLB {
> +    CPUTLBDescFast f[NB_MMU_MODES];
> +    CPUTLBDesc d[NB_MMU_MODES];
> +    CPUTLBCommon c;
> +} CPUTLB;
> +
> +/* There are target-specific members named "tlb".  This is temporary.  */
> +#define CPU_COMMON    CPUTLB tlb_;
> +#define env_tlb(ENV)  (&(ENV)->tlb_)
>
>  #else
>
> -#define CPU_COMMON_TLB
> -
> -#endif
> -
> -
> -#define CPU_COMMON                                                      \
> -    /* soft mmu support */                                              \
> -    CPU_COMMON_TLB                                                      \
> +#define CPU_COMMON  /* Nothing */
> +
> +#endif  /* !CONFIG_USER_ONLY && CONFIG_TCG */
>
>  #endif
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index d78041d7a0..09abd95008 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -139,21 +139,21 @@ static inline target_ulong tlb_addr_write(const 
> CPUTLBEntry *entry)
>  static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
>                                    target_ulong addr)
>  {
> -    uintptr_t size_mask = env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS;
> +    uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> 
> CPU_TLB_ENTRY_BITS;
>
>      return (addr >> TARGET_PAGE_BITS) & size_mask;
>  }
>
>  static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
>  {
> -    return (env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS) + 1;
> +    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
>  }
>
>  /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
>  static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
>                                       target_ulong addr)
>  {
> -    return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
> +    return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
>  }
>
>  #ifdef MMU_MODE0_SUFFIX
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 23586f9974..c28b6b6328 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -76,7 +76,7 @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
>
>  static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
>  {
> -    return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS);
> +    return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
>  }
>
>  static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
> @@ -91,14 +91,14 @@ static void tlb_dyn_init(CPUArchState *env)
>      int i;
>
>      for (i = 0; i < NB_MMU_MODES; i++) {
> -        CPUTLBDesc *desc = &env->tlb_d[i];
> +        CPUTLBDesc *desc = &env_tlb(env)->d[i];
>          size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
>
>          tlb_window_reset(desc, get_clock_realtime(), 0);
>          desc->n_used_entries = 0;
> -        env->tlb_mask[i] = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
> -        env->tlb_table[i] = g_new(CPUTLBEntry, n_entries);
> -        env->iotlb[i] = g_new(CPUIOTLBEntry, n_entries);
> +        env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
> +        env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
> +        env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
>      }
>  }
>
> @@ -144,7 +144,7 @@ static void tlb_dyn_init(CPUArchState *env)
>   */
>  static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
>  {
> -    CPUTLBDesc *desc = &env->tlb_d[mmu_idx];
> +    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
>      size_t old_size = tlb_n_entries(env, mmu_idx);
>      size_t rate;
>      size_t new_size = old_size;
> @@ -187,14 +187,14 @@ static void tlb_mmu_resize_locked(CPUArchState *env, 
> int mmu_idx)
>          return;
>      }
>
> -    g_free(env->tlb_table[mmu_idx]);
> -    g_free(env->iotlb[mmu_idx]);
> +    g_free(env_tlb(env)->f[mmu_idx].table);
> +    g_free(env_tlb(env)->d[mmu_idx].iotlb);
>
>      tlb_window_reset(desc, now, 0);
>      /* desc->n_used_entries is cleared by the caller */
> -    env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> -    env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size);
> -    env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size);
> +    env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> +    env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
> +    env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
>      /*
>       * If the allocations fail, try smaller sizes. We just freed some
>       * memory, so going back to half of new_size has a good chance of 
> working.
> @@ -202,46 +202,47 @@ static void tlb_mmu_resize_locked(CPUArchState *env, 
> int mmu_idx)
>       * allocations to fail though, so we progressively reduce the allocation
>       * size, aborting if we cannot even allocate the smallest TLB we support.
>       */
> -    while (env->tlb_table[mmu_idx] == NULL || env->iotlb[mmu_idx] == NULL) {
> +    while (env_tlb(env)->f[mmu_idx].table == NULL ||
> +           env_tlb(env)->d[mmu_idx].iotlb == NULL) {
>          if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
>              error_report("%s: %s", __func__, strerror(errno));
>              abort();
>          }
>          new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
> -        env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS;
> +        env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
>
> -        g_free(env->tlb_table[mmu_idx]);
> -        g_free(env->iotlb[mmu_idx]);
> -        env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size);
> -        env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size);
> +        g_free(env_tlb(env)->f[mmu_idx].table);
> +        g_free(env_tlb(env)->d[mmu_idx].iotlb);
> +        env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
> +        env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
>      }
>  }
>
>  static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
>  {
>      tlb_mmu_resize_locked(env, mmu_idx);
> -    memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx));
> -    env->tlb_d[mmu_idx].n_used_entries = 0;
> +    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
> +    env_tlb(env)->d[mmu_idx].n_used_entries = 0;
>  }
>
>  static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t 
> mmu_idx)
>  {
> -    env->tlb_d[mmu_idx].n_used_entries++;
> +    env_tlb(env)->d[mmu_idx].n_used_entries++;
>  }
>
>  static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t 
> mmu_idx)
>  {
> -    env->tlb_d[mmu_idx].n_used_entries--;
> +    env_tlb(env)->d[mmu_idx].n_used_entries--;
>  }
>
>  void tlb_init(CPUState *cpu)
>  {
>      CPUArchState *env = cpu->env_ptr;
>
> -    qemu_spin_init(&env->tlb_c.lock);
> +    qemu_spin_init(&env_tlb(env)->c.lock);
>
>      /* Ensure that cpu_reset performs a full flush.  */
> -    env->tlb_c.dirty = ALL_MMUIDX_BITS;
> +    env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
>
>      tlb_dyn_init(env);
>  }
> @@ -273,9 +274,9 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, 
> size_t *pelide)
>      CPU_FOREACH(cpu) {
>          CPUArchState *env = cpu->env_ptr;
>
> -        full += atomic_read(&env->tlb_c.full_flush_count);
> -        part += atomic_read(&env->tlb_c.part_flush_count);
> -        elide += atomic_read(&env->tlb_c.elide_flush_count);
> +        full += atomic_read(&env_tlb(env)->c.full_flush_count);
> +        part += atomic_read(&env_tlb(env)->c.part_flush_count);
> +        elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
>      }
>      *pfull = full;
>      *ppart = part;
> @@ -285,10 +286,11 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, 
> size_t *pelide)
>  static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
>  {
>      tlb_table_flush_by_mmuidx(env, mmu_idx);
> -    memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
> -    env->tlb_d[mmu_idx].large_page_addr = -1;
> -    env->tlb_d[mmu_idx].large_page_mask = -1;
> -    env->tlb_d[mmu_idx].vindex = 0;
> +    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
> +    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
> +    env_tlb(env)->d[mmu_idx].vindex = 0;
> +    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
> +           sizeof(env_tlb(env)->d[0].vtable));
>  }
>
>  static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data 
> data)
> @@ -301,31 +303,31 @@ static void tlb_flush_by_mmuidx_async_work(CPUState 
> *cpu, run_on_cpu_data data)
>
>      tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
>
> -    qemu_spin_lock(&env->tlb_c.lock);
> +    qemu_spin_lock(&env_tlb(env)->c.lock);
>
> -    all_dirty = env->tlb_c.dirty;
> +    all_dirty = env_tlb(env)->c.dirty;
>      to_clean = asked & all_dirty;
>      all_dirty &= ~to_clean;
> -    env->tlb_c.dirty = all_dirty;
> +    env_tlb(env)->c.dirty = all_dirty;
>
>      for (work = to_clean; work != 0; work &= work - 1) {
>          int mmu_idx = ctz32(work);
>          tlb_flush_one_mmuidx_locked(env, mmu_idx);
>      }
>
> -    qemu_spin_unlock(&env->tlb_c.lock);
> +    qemu_spin_unlock(&env_tlb(env)->c.lock);
>
>      cpu_tb_jmp_cache_clear(cpu);
>
>      if (to_clean == ALL_MMUIDX_BITS) {
> -        atomic_set(&env->tlb_c.full_flush_count,
> -                   env->tlb_c.full_flush_count + 1);
> +        atomic_set(&env_tlb(env)->c.full_flush_count,
> +                   env_tlb(env)->c.full_flush_count + 1);
>      } else {
> -        atomic_set(&env->tlb_c.part_flush_count,
> -                   env->tlb_c.part_flush_count + ctpop16(to_clean));
> +        atomic_set(&env_tlb(env)->c.part_flush_count,
> +                   env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
>          if (to_clean != asked) {
> -            atomic_set(&env->tlb_c.elide_flush_count,
> -                       env->tlb_c.elide_flush_count +
> +            atomic_set(&env_tlb(env)->c.elide_flush_count,
> +                       env_tlb(env)->c.elide_flush_count +
>                         ctpop16(asked & ~to_clean));
>          }
>      }
> @@ -410,11 +412,12 @@ static inline bool tlb_flush_entry_locked(CPUTLBEntry 
> *tlb_entry,
>  static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
>                                                target_ulong page)
>  {
> +    CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
>      int k;
>
>      assert_cpu_is_self(ENV_GET_CPU(env));
>      for (k = 0; k < CPU_VTLB_SIZE; k++) {
> -        if (tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page)) {
> +        if (tlb_flush_entry_locked(&d->vtable[k], page)) {
>              tlb_n_used_entries_dec(env, mmu_idx);
>          }
>      }
> @@ -423,8 +426,8 @@ static inline void 
> tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
>  static void tlb_flush_page_locked(CPUArchState *env, int midx,
>                                    target_ulong page)
>  {
> -    target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
> -    target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
> +    target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
> +    target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
>
>      /* Check if we need to flush due to large pages.  */
>      if ((page & lp_mask) == lp_addr) {
> @@ -459,13 +462,13 @@ static void 
> tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
>      tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
>                addr, mmu_idx_bitmap);
>
> -    qemu_spin_lock(&env->tlb_c.lock);
> +    qemu_spin_lock(&env_tlb(env)->c.lock);
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
>              tlb_flush_page_locked(env, mmu_idx, addr);
>          }
>      }
> -    qemu_spin_unlock(&env->tlb_c.lock);
> +    qemu_spin_unlock(&env_tlb(env)->c.lock);
>
>      tb_flush_jmp_cache(cpu, addr);
>  }
> @@ -609,22 +612,22 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, 
> ram_addr_t length)
>      int mmu_idx;
>
>      env = cpu->env_ptr;
> -    qemu_spin_lock(&env->tlb_c.lock);
> +    qemu_spin_lock(&env_tlb(env)->c.lock);
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          unsigned int i;
>          unsigned int n = tlb_n_entries(env, mmu_idx);
>
>          for (i = 0; i < n; i++) {
> -            tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
> -                                         length);
> +            tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
> +                                         start1, length);
>          }
>
>          for (i = 0; i < CPU_VTLB_SIZE; i++) {
> -            tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], 
> start1,
> -                                         length);
> +            tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
> +                                         start1, length);
>          }
>      }
> -    qemu_spin_unlock(&env->tlb_c.lock);
> +    qemu_spin_unlock(&env_tlb(env)->c.lock);
>  }
>
>  /* Called with tlb_c.lock held */
> @@ -646,7 +649,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
>      assert_cpu_is_self(cpu);
>
>      vaddr &= TARGET_PAGE_MASK;
> -    qemu_spin_lock(&env->tlb_c.lock);
> +    qemu_spin_lock(&env_tlb(env)->c.lock);
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
>      }
> @@ -654,10 +657,10 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          int k;
>          for (k = 0; k < CPU_VTLB_SIZE; k++) {
> -            tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
> +            tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], 
> vaddr);
>          }
>      }
> -    qemu_spin_unlock(&env->tlb_c.lock);
> +    qemu_spin_unlock(&env_tlb(env)->c.lock);
>  }
>
>  /* Our TLB does not support large pages, so remember the area covered by
> @@ -665,7 +668,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
>  static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
>                                 target_ulong vaddr, target_ulong size)
>  {
> -    target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
> +    target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
>      target_ulong lp_mask = ~(size - 1);
>
>      if (lp_addr == (target_ulong)-1) {
> @@ -675,13 +678,13 @@ static void tlb_add_large_page(CPUArchState *env, int 
> mmu_idx,
>          /* Extend the existing region to include the new page.
>             This is a compromise between unnecessary flushes and
>             the cost of maintaining a full variable size TLB.  */
> -        lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
> +        lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
>          while (((lp_addr ^ vaddr) & lp_mask) != 0) {
>              lp_mask <<= 1;
>          }
>      }
> -    env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
> -    env->tlb_d[mmu_idx].large_page_mask = lp_mask;
> +    env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
> +    env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
>  }
>
>  /* Add a new TLB entry. At most one entry for a given virtual address
> @@ -757,10 +760,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, 
> target_ulong vaddr,
>       * a longer critical section, but this is not a concern since the TLB 
> lock
>       * is unlikely to be contended.
>       */
> -    qemu_spin_lock(&env->tlb_c.lock);
> +    qemu_spin_lock(&env_tlb(env)->c.lock);
>
>      /* Note that the tlb is no longer clean.  */
> -    env->tlb_c.dirty |= 1 << mmu_idx;
> +    env_tlb(env)->c.dirty |= 1 << mmu_idx;
>
>      /* Make sure there's no cached translation for the new page.  */
>      tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
> @@ -770,12 +773,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, 
> target_ulong vaddr,
>       * different page; otherwise just overwrite the stale data.
>       */
>      if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
> -        unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
> -        CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
> +        unsigned vidx = env_tlb(env)->d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
> +        CPUTLBEntry *tv = &env_tlb(env)->d[mmu_idx].vtable[vidx];
>
>          /* Evict the old entry into the victim tlb.  */
>          copy_tlb_helper_locked(tv, te);
> -        env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
> +        env_tlb(env)->d[mmu_idx].viotlb[vidx] = 
> env_tlb(env)->d[mmu_idx].iotlb[index];
>          tlb_n_used_entries_dec(env, mmu_idx);
>      }
>
> @@ -792,8 +795,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
> vaddr,
>       * subtract here is that of the page base, and not the same as the
>       * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
>       */
> -    env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
> -    env->iotlb[mmu_idx][index].attrs = attrs;
> +    env_tlb(env)->d[mmu_idx].iotlb[index].addr = iotlb - vaddr_page;
> +    env_tlb(env)->d[mmu_idx].iotlb[index].attrs = attrs;
>
>      /* Now calculate the new entry */
>      tn.addend = addend - vaddr_page;
> @@ -829,7 +832,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
> vaddr,
>
>      copy_tlb_helper_locked(te, &tn);
>      tlb_n_used_entries_inc(env, mmu_idx);
> -    qemu_spin_unlock(&env->tlb_c.lock);
> +    qemu_spin_unlock(&env_tlb(env)->c.lock);
>  }
>
>  /* Add a new TLB entry, but without specifying the memory
> @@ -996,7 +999,7 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
> mmu_idx, size_t index,
>
>      assert_cpu_is_self(ENV_GET_CPU(env));
>      for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
> -        CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
> +        CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
>          target_ulong cmp;
>
>          /* elt_ofs might correspond to .addr_write, so use atomic_read */
> @@ -1008,16 +1011,16 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
> mmu_idx, size_t index,
>
>          if (cmp == page) {
>              /* Found entry in victim tlb, swap tlb and iotlb.  */
> -            CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
> +            CPUTLBEntry tmptlb, *tlb = 
> &env_tlb(env)->f[mmu_idx].table[index];
>
> -            qemu_spin_lock(&env->tlb_c.lock);
> +            qemu_spin_lock(&env_tlb(env)->c.lock);
>              copy_tlb_helper_locked(&tmptlb, tlb);
>              copy_tlb_helper_locked(tlb, vtlb);
>              copy_tlb_helper_locked(vtlb, &tmptlb);
> -            qemu_spin_unlock(&env->tlb_c.lock);
> +            qemu_spin_unlock(&env_tlb(env)->c.lock);
>
> -            CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
> -            CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
> +            CPUIOTLBEntry tmpio, *io = 
> &env_tlb(env)->d[mmu_idx].iotlb[index];
> +            CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
>              tmpio = *io; *io = *vio; *vio = tmpio;
>              return true;
>          }
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 1959046343..9bd23f5cae 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -14163,7 +14163,7 @@ static bool is_guarded_page(CPUARMState *env, 
> DisasContext *s)
>       * table entry even for that case.
>       */
>      return (tlb_hit(entry->addr_code, addr) &&
> -            env->iotlb[mmu_idx][index].attrs.target_tlb_bit0);
> +            env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
>  #endif
>  }
>
> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
> index d57f9e500f..5e6af10faf 100644
> --- a/tcg/aarch64/tcg-target.inc.c
> +++ b/tcg/aarch64/tcg-target.inc.c
> @@ -1451,12 +1451,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
> is_ld, TCGMemOpIdx oi,
>      label->label_ptr[0] = label_ptr;
>  }
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
>  /* We expect to use a 24-bit unsigned offset from ENV.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
>                    > 0xffffff);
>
>  /* Load and compare a TLB entry, emitting the conditional jump to the
> @@ -1467,8 +1463,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg 
> addr_reg, TCGMemOp opc,
>                               tcg_insn_unit **label_ptr, int mem_index,
>                               bool is_read)
>  {
> -    int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_ofs = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_ofs = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      unsigned a_bits = get_alignment_bits(opc);
>      unsigned s_bits = opc & MO_SIZE;
>      unsigned a_mask = (1u << a_bits) - 1;
> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
> index 2245a8aeb9..04c2eebb41 100644
> --- a/tcg/arm/tcg-target.inc.c
> +++ b/tcg/arm/tcg-target.inc.c
> @@ -1235,12 +1235,8 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg 
> argreg,
>
>  #define TLB_SHIFT      (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
>  /* We expect to use a 20-bit unsigned offset from ENV.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
>                    > 0xfffff);
>
>  /* Load and compare a TLB entry, leaving the flags set.  Returns the register
> @@ -1251,8 +1247,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
> addrlo, TCGReg addrhi,
>  {
>      int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
>                     : offsetof(CPUTLBEntry, addr_write));
> -    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
>      unsigned s_bits = opc & MO_SIZE;
>      unsigned a_bits = get_alignment_bits(opc);
> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
> index e0670e5098..1bd33389c9 100644
> --- a/tcg/i386/tcg-target.inc.c
> +++ b/tcg/i386/tcg-target.inc.c
> @@ -1654,10 +1654,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, 
> TCGReg addrlo, TCGReg addrhi,
>                     TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
>
>      tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
> -                         offsetof(CPUArchState, tlb_mask[mem_index]));
> +                         offsetof(CPUArchState, tlb_.f[mem_index].mask));
>
>      tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
> -                         offsetof(CPUArchState, tlb_table[mem_index]));
> +                         offsetof(CPUArchState, tlb_.f[mem_index].table));
>
>      /* If the required alignment is at least as large as the access, simply
>         copy the address and mask.  For lesser alignments, check that we don't
> diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
> index 8a92e916dd..b827579317 100644
> --- a/tcg/mips/tcg-target.inc.c
> +++ b/tcg/mips/tcg-target.inc.c
> @@ -1201,14 +1201,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int 
> i, TCGReg al, TCGReg ah)
>      return i;
>  }
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> -                  offsetof(CPUArchState, tlb_mask) >= 0x8000);
> -
>  /*
>   * Perform the tlb comparison operation.
>   * The complete host address is placed in BASE.
> @@ -1222,8 +1214,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
> base, TCGReg addrl,
>      unsigned s_bits = opc & MO_SIZE;
>      unsigned a_bits = get_alignment_bits(opc);
>      int mem_index = get_mmuidx(oi);
> -    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
>      int add_off = offsetof(CPUTLBEntry, addend);
>      int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
>                     : offsetof(CPUTLBEntry, addr_write));
> diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
> index 773690f1d9..1f717745c1 100644
> --- a/tcg/ppc/tcg-target.inc.c
> +++ b/tcg/ppc/tcg-target.inc.c
> @@ -1505,10 +1505,6 @@ static void * const qemu_st_helpers[16] = {
>      [MO_BEQ]  = helper_be_stq_mmu,
>  };
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
>  /* Perform the TLB load and compare.  Places the result of the comparison
>     in CR7, loads the addend of the TLB into R3, and returns the register
>     containing the guest address (zero-extended into R4).  Clobbers R0 and 
> R2. */
> @@ -1521,8 +1517,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp 
> opc,
>          = (is_read
>             ? offsetof(CPUTLBEntry, addr_read)
>             : offsetof(CPUTLBEntry, addr_write));
> -    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
>      unsigned s_bits = opc & MO_SIZE;
>      unsigned a_bits = get_alignment_bits(opc);
> diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c
> index b785f4acb7..c1f9c784bc 100644
> --- a/tcg/riscv/tcg-target.inc.c
> +++ b/tcg/riscv/tcg-target.inc.c
> @@ -961,14 +961,6 @@ static void * const qemu_st_helpers[16] = {
>  /* We don't support oversize guests */
>  QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> -                  offsetof(CPUArchState, tlb_mask) >= 0x800);
> -
>  static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
>                               TCGReg addrh, TCGMemOpIdx oi,
>                               tcg_insn_unit **label_ptr, bool is_load)
> @@ -981,8 +973,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
>      int mask_off, table_off;
>      TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
>
> -    mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      if (table_off > 0x7ff) {
>          int mask_hi = mask_off - sextreg(mask_off, 0, 12);
>          int table_hi = table_off - sextreg(table_off, 0, 12);
> diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
> index 7db90b3bae..3a8794d9bd 100644
> --- a/tcg/s390/tcg-target.inc.c
> +++ b/tcg/s390/tcg-target.inc.c
> @@ -1538,9 +1538,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
> TCGMemOp opc, TCGReg data,
>  #include "tcg-ldst.inc.c"
>
>  /* We're expecting to use a 20-bit signed offset on the tlb memory ops.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_mask[NB_MMU_MODES - 1])
> -                  > 0x7ffff);
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
>                    > 0x7ffff);
>
>  /* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
> @@ -1552,8 +1550,8 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg 
> addr_reg, TCGMemOp opc,
>      unsigned a_bits = get_alignment_bits(opc);
>      unsigned s_mask = (1 << s_bits) - 1;
>      unsigned a_mask = (1 << a_bits) - 1;
> -    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      int ofs, a_off;
>      uint64_t tlb_mask;
>
> diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
> index 7a61839dc1..be10124e11 100644
> --- a/tcg/sparc/tcg-target.inc.c
> +++ b/tcg/sparc/tcg-target.inc.c
> @@ -1074,19 +1074,11 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int 
> count)
>     The result of the TLB comparison is in %[ix]cc.  The sanitized address
>     is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
>
> -/* We expect tlb_mask to be before tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
> -                  offsetof(CPUArchState, tlb_mask));
> -
> -/* We expect tlb_mask to be "near" tlb_table.  */
> -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
> -                  offsetof(CPUArchState, tlb_mask) >= (1 << 13));
> -
>  static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
>                                 TCGMemOp opc, int which)
>  {
> -    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
> -    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
> +    int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
> +    int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
>      TCGReg base = TCG_AREG0;
>      const TCGReg r0 = TCG_REG_O0;
>      const TCGReg r1 = TCG_REG_O1;
> --
> 2.17.1
>
>

Re: [Qemu-devel] [PATCH 03/35] tcg: Create struct CPUTLB

Reply via email to