Richard Henderson <richard.hender...@linaro.org> writes:
> The primary motivation is to remove a dozen insns along > the fast-path in tb_lookup. As a byproduct, this allows > us to completely remove parallel_cpus. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > accel/tcg/tcg-accel-ops.h | 1 + > include/exec/exec-all.h | 7 +------ > include/hw/core/cpu.h | 2 ++ > accel/tcg/cpu-exec.c | 3 --- > accel/tcg/tcg-accel-ops-mttcg.c | 3 +-- > accel/tcg/tcg-accel-ops-rr.c | 2 +- > accel/tcg/tcg-accel-ops.c | 8 ++++++++ > accel/tcg/translate-all.c | 4 ---- > linux-user/main.c | 1 + > linux-user/sh4/signal.c | 8 +++++--- > linux-user/syscall.c | 18 ++++++++++-------- > 11 files changed, 30 insertions(+), 27 deletions(-) > > diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h > index 48130006de..6a5fcef889 100644 > --- a/accel/tcg/tcg-accel-ops.h > +++ b/accel/tcg/tcg-accel-ops.h > @@ -17,5 +17,6 @@ > void tcg_cpus_destroy(CPUState *cpu); > int tcg_cpus_exec(CPUState *cpu); > void tcg_handle_interrupt(CPUState *cpu, int mask); > +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel); > > #endif /* TCG_CPUS_H */ > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 75f8c3981a..310f474540 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -510,8 +510,6 @@ struct TranslationBlock { > uintptr_t jmp_dest[2]; > }; > > -extern bool parallel_cpus; > - > /* Hide the qatomic_read to make code a little easier on the eyes */ > static inline uint32_t tb_cflags(const TranslationBlock *tb) > { > @@ -521,10 +519,7 @@ static inline uint32_t tb_cflags(const TranslationBlock > *tb) > /* current cflags for hashing/comparison */ > static inline uint32_t curr_cflags(CPUState *cpu) > { > - uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index); > - cflags |= parallel_cpus ? CF_PARALLEL : 0; > - cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; > - return cflags; > + return cpu->tcg_cflags; > } > > /* TranslationBlock invalidate API */ > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > index c005d3dc2d..c68bc3ba8a 100644 > --- a/include/hw/core/cpu.h > +++ b/include/hw/core/cpu.h > @@ -282,6 +282,7 @@ struct qemu_work_item; > * to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will > * be the same as the cluster-id property of the CPU object's > TYPE_CPU_CLUSTER > * QOM parent. > + * @tcg_cflags: Pre-computed cflags for this cpu. > * @nr_cores: Number of cores within this CPU package. > * @nr_threads: Number of threads within this CPU. > * @running: #true if CPU is currently running (lockless). > @@ -412,6 +413,7 @@ struct CPUState { > /* TODO Move common fields from CPUArchState here. */ > int cpu_index; > int cluster_index; > + uint32_t tcg_cflags; > uint32_t halted; > uint32_t can_do_io; > int32_t exception_index; > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index 931da96c2b..bdfa036ac8 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu) > mmap_unlock(); > } > > - /* Since we got here, we know that parallel_cpus must be true. */ > - parallel_cpus = false; > cpu_exec_enter(cpu); > /* execute the generated code */ > trace_exec_tb(tb, pc); > @@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu) > * the execution. > */ > g_assert(cpu_in_exclusive_context(cpu)); > - parallel_cpus = true; > cpu->running = false; > end_exclusive(); I don't see where we generate non-parallel aware code. Do we not care about it anymore? Anyway just an observation: Reviewed-by: Alex Bennée <alex.ben...@linaro.org> -- Alex Bennée