PowerPC TCG flushes the TLB on every IR/DR change, which basically means on every user<->kernel context switch. Use the 6-element TLB array as a cache, where each MMU index is mapped to a different state of the IR/DR/PR/HV bits.
This brings the number of TLB flushes down from ~900000 to ~50000 for starting up the Debian installer, which is in line with x86 and gives a ~10% performance improvement. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- cputlb.c | 19 +++++++++++++++++ hw/ppc/spapr_hcall.c | 6 +++++- include/exec/exec-all.h | 5 +++++ target-ppc/cpu.h | 4 +++- target-ppc/excp_helper.c | 6 +----- target-ppc/helper_regs.h | 52 +++++++++++++++++++++++++++++++-------------- target-ppc/translate_init.c | 5 +++++ 7 files changed, 74 insertions(+), 23 deletions(-) diff --git a/cputlb.c b/cputlb.c index afd3705..17e1b03 100644 --- a/cputlb.c +++ b/cputlb.c @@ -67,6 +67,25 @@ void tlb_flush(CPUState *cpu, int flush_global) tlb_flush_count++; } +void tlb_flush_idx(CPUState *cpu, int mmu_idx) +{ + CPUArchState *env = cpu->env_ptr; + +#if defined(DEBUG_TLB) + printf("tlb_flush_idx %d:\n", mmu_idx); +#endif + /* must reset current TB so that interrupts cannot modify the + links while we are modifying them */ + cpu->current_tb = NULL; + + memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[mmu_idx])); + memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + + env->tlb_flush_addr = -1; + env->tlb_flush_mask = 0; + tlb_flush_count++; +} + static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr) { if (addr == (tlb_entry->addr_read & diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 467858c..b95961c 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -556,13 +556,17 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); + bool flush; env->msr |= (1ULL << MSR_EE); - hreg_compute_hflags(env); + flush = hreg_compute_hflags(env); if (!cpu_has_work(cs)) { cs->halted = 1; cs->exception_index = EXCP_HLT; cs->exit_request = 1; + } else if (flush) { + cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + cs->exit_request = 1; } return H_SUCCESS; } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 5e5d86e..629a550 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -100,6 +100,7 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as); /* cputlb.c */ void tlb_flush_page(CPUState *cpu, target_ulong addr); void tlb_flush(CPUState *cpu, int flush_global); +void tlb_flush_idx(CPUState *cpu, int mmu_idx); void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size); @@ -112,6 +113,10 @@ static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) static inline void tlb_flush(CPUState *cpu, int flush_global) { } + +static inline void tlb_flush_idx(CPUState *cpu, int mmu_idx) +{ +} #endif #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index b64c652..c1cb27f 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -922,7 +922,7 @@ struct ppc_segment_page_sizes { /*****************************************************************************/ /* The whole PowerPC CPU context */ -#define NB_MMU_MODES 3 +#define NB_MMU_MODES 6 #define PPC_CPU_OPCODES_LEN 0x40 @@ -1085,6 +1085,8 @@ struct CPUPPCState { target_ulong hflags; /* hflags is a MSR & HFLAGS_MASK */ target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */ int mmu_idx; /* precomputed MMU index to speed up mem accesses */ + uint32_t mmu_msr[NB_MMU_MODES]; /* ir/dr/hv/pr values for TLBs */ + int mmu_fifo; /* for replacement in mmu_msr */ /* Power management */ int (*check_pow)(CPUPPCState *env); diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index be71590..bf25d44 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) if (env->spr[SPR_LPCR] & LPCR_AIL) { new_msr |= (1 << MSR_IR) | (1 << MSR_DR); - } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) { - /* If we disactivated any translation, flush TLBs */ - tlb_flush(cs, 1); } #ifdef TARGET_PPC64 @@ -678,8 +675,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) if ((env->mmu_model == POWERPC_MMU_BOOKE) || (env->mmu_model == POWERPC_MMU_BOOKE206)) { /* XXX: The BookE changes address space when switching modes, - we should probably implement that as different MMU indexes, - but for the moment we do it the slow way and flush all. */ + TODO: still needed?!? */ tlb_flush(cs, 1); } } diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 271fddf..291f9c1 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -39,17 +39,38 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env) env->tgpr[3] = tmp; } -static inline void hreg_compute_mem_idx(CPUPPCState *env) +static inline bool hreg_compute_mem_idx(CPUPPCState *env) { - /* Precompute MMU index */ - if (msr_pr == 0 && msr_hv != 0) { - env->mmu_idx = 2; - } else { - env->mmu_idx = 1 - msr_pr; + CPUState *cs = CPU(ppc_env_get_cpu(env)); + int msr = env->msr; + int i; + + if (!tcg_enabled()) { + return false; + } + + msr &= (1 << MSR_IR) | (1 << MSR_DR) | (1 << MSR_PR) | MSR_HVB; + if (msr_pr == 1) { + msr &= ~MSR_HVB; } + + for (i = 0; i < NB_MMU_MODES; i++) { + if (env->mmu_msr[i] == msr) { + env->mmu_idx = i; + return false; + } + } + + /* Use a new index with FIFO replacement. */ + i = (env->mmu_fifo == NB_MMU_MODES - 1 ? 0 : env->mmu_fifo + 1); + env->mmu_fifo = i; + env->mmu_msr[i] = msr; + env->mmu_idx = i; + tlb_flush_idx(cs, i); + return true; } -static inline void hreg_compute_hflags(CPUPPCState *env) +static inline bool hreg_compute_hflags(CPUPPCState *env) { target_ulong hflags_mask; @@ -58,10 +79,10 @@ static inline void hreg_compute_hflags(CPUPPCState *env) (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) | (1 << MSR_LE) | (1 << MSR_VSX); hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB; - hreg_compute_mem_idx(env); env->hflags = env->msr & hflags_mask; /* Merge with hflags coming from other registers */ env->hflags |= env->hflags_nmsr; + return hreg_compute_mem_idx(env); } static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, @@ -80,13 +101,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, value &= ~MSR_HVB; value |= env->msr & MSR_HVB; } - if (((value >> MSR_IR) & 1) != msr_ir || - ((value >> MSR_DR) & 1) != msr_dr) { - /* Flush all tlb when changing translation mode */ - tlb_flush(cs, 1); - excp = POWERPC_EXCP_NONE; - cs->interrupt_request |= CPU_INTERRUPT_EXITTB; - } if (unlikely((env->flags & POWERPC_FLAG_TGPR) && ((value ^ env->msr) & (1 << MSR_TGPR)))) { /* Swap temporary saved registers with GPRs */ @@ -98,7 +112,13 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, } #endif env->msr = value; - hreg_compute_hflags(env); + if (hreg_compute_hflags(env)) { +#if !defined(CONFIG_USER_ONLY) + /* TLB was flushed, exit the current translation block. */ + excp = POWERPC_EXCP_NONE; + cs->interrupt_request |= CPU_INTERRUPT_EXITTB; +#endif + } #if !defined(CONFIG_USER_ONLY) if (unlikely(msr_pow == 1)) { if (!env->pending_interrupts && (*env->check_pow)(env)) { diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 48177ed..1c2ded9 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -9472,6 +9472,7 @@ static void ppc_cpu_reset(CPUState *s) /* XXX: find a suitable condition to enable the hypervisor mode */ msr |= (target_ulong)MSR_HVB; } + msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */ msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */ msr |= (target_ulong)1 << MSR_EP; @@ -9504,6 +9505,10 @@ static void ppc_cpu_reset(CPUState *s) } #endif + for (i = 1; i < NB_MMU_MODES; i++) { + env->mmu_msr[i] = -1; + } + hreg_store_msr(env, msr, 1); #if !defined(CONFIG_USER_ONLY) -- 1.8.3.1