Re: [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats
On 10/07/2017 12:03, Alex Bennée wrote: >> +extern bool enable_instrumentation; >> + > Is there a better place for this than a static global? I was pondering > tcg_ctx but that's not really visible to the runtime. Making it part of > the TB flags might be useful for only instrumenting certain segments of > the code but I suspect I'm bike-shedding at this point. Why not use tracepoints? This seems to be a natural candidate for systemtap-like tracing scripts. Paolo
Re: [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats
Pranith Kumarwrites: > I used the following patch to collect hit/miss TLB ratios for a few > benchmarks. The results can be found here: http://imgur.com/a/gee1o > > Please note that these results also include boot/shutdown as the > per-region instrumentation patch came later. > > Signed-off-by: Pranith Kumar > --- > accel/tcg/cputlb.c| 12 > cpus.c| 26 ++ > include/exec/cpu-defs.h | 4 > include/sysemu/cpus.h | 2 ++ > target/arm/helper.c | 6 +- > tcg/i386/tcg-target.inc.c | 16 ++-- > vl.c | 3 +++ > 7 files changed, 66 insertions(+), 3 deletions(-) > > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index ef52a7e5e0..2ac2397431 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -864,12 +864,19 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry > *iotlbentry, > } > } > > +extern bool enable_instrumentation; > + Is there a better place for this than a static global? I was pondering tcg_ctx but that's not really visible to the runtime. Making it part of the TB flags might be useful for only instrumenting certain segments of the code but I suspect I'm bike-shedding at this point. > /* Return true if ADDR is present in the victim tlb, and has been copied > back to the main tlb. */ > static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, > size_t elt_ofs, target_ulong page) > { > size_t vidx; > + > +if (enable_instrumentation) { > +env->tlb_access_victim++; > +} > + > for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { > CPUTLBEntry *vtlb = >tlb_v_table[mmu_idx][vidx]; > target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); > @@ -885,6 +892,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t > mmu_idx, size_t index, > CPUIOTLBEntry tmpio, *io = >iotlb[mmu_idx][index]; > CPUIOTLBEntry *vio = >iotlb_v[mmu_idx][vidx]; > tmpio = *io; *io = *vio; *vio = tmpio; > + > +if (enable_instrumentation) { > +env->tlb_access_victim_hit++; > +} > + > return true; > } > } > diff --git a/cpus.c b/cpus.c > index 14bb8d552e..14669b3469 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1602,6 +1602,32 @@ static bool all_vcpus_paused(void) > return true; > } > > +void print_tlb_stats(void) > +{ > +CPUState *cpu; > +CPU_FOREACH(cpu) { > +CPUArchState *cs = cpu->env_ptr; > + > +fprintf(stderr, "TLB accesses %lu, hits %lu, victim accesses %lu, > hits %lu\n", > +cs->tlb_access_total, cs->tlb_access_hit, > cs->tlb_access_victim, > +cs->tlb_access_victim_hit); > +} > +} > + > +void clear_tlb_stats(void) > +{ > +CPUState *cpu; > +CPU_FOREACH(cpu) { > +CPUArchState *cs = cpu->env_ptr; > + > +cs->tlb_access_total= 0; > +cs->tlb_access_hit = 0; > +cs->tlb_access_victim = 0; > +cs->tlb_access_victim = 0; Duplicate line here. > +cs->tlb_access_victim_hit = 0; > +} > +} > + > void pause_all_vcpus(void) > { > CPUState *cpu; > diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h > index 5f4e303635..29b3c2ada8 100644 > --- a/include/exec/cpu-defs.h > +++ b/include/exec/cpu-defs.h > @@ -138,6 +138,10 @@ typedef struct CPUIOTLBEntry { > target_ulong tlb_flush_addr;\ > target_ulong tlb_flush_mask;\ > target_ulong vtlb_index;\ > +target_ulong tlb_access_hit;\ > +target_ulong tlb_access_total; \ > +target_ulong tlb_access_victim; \ > +target_ulong tlb_access_victim_hit; \ > > #else > > diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h > index 731756d948..7d8d92646c 100644 > --- a/include/sysemu/cpus.h > +++ b/include/sysemu/cpus.h > @@ -10,6 +10,8 @@ void resume_all_vcpus(void); > void pause_all_vcpus(void); > void cpu_stop_current(void); > void cpu_ticks_init(void); > +void print_tlb_stats(void); > +void clear_tlb_stats(void); > > void configure_icount(QemuOpts *opts, Error **errp); > extern int use_icount; > diff --git a/target/arm/helper.c b/target/arm/helper.c > index dfbf03676c..d2e75b0f20 100644 > --- a/target/arm/helper.c > +++ b/target/arm/helper.c > @@ -1124,7 +1124,9 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const > ARMCPRegInfo *ri) > } > } > > -bool enable_instrumentation; > +extern bool enable_instrumentation; > +extern void print_tlb_stats(void); > +extern void clear_tlb_stats(void); > > static void
[Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats
I used the following patch to collect hit/miss TLB ratios for a few benchmarks. The results can be found here: http://imgur.com/a/gee1o Please note that these results also include boot/shutdown as the per-region instrumentation patch came later. Signed-off-by: Pranith Kumar--- accel/tcg/cputlb.c| 12 cpus.c| 26 ++ include/exec/cpu-defs.h | 4 include/sysemu/cpus.h | 2 ++ target/arm/helper.c | 6 +- tcg/i386/tcg-target.inc.c | 16 ++-- vl.c | 3 +++ 7 files changed, 66 insertions(+), 3 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index ef52a7e5e0..2ac2397431 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -864,12 +864,19 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, } } +extern bool enable_instrumentation; + /* Return true if ADDR is present in the victim tlb, and has been copied back to the main tlb. */ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, size_t elt_ofs, target_ulong page) { size_t vidx; + +if (enable_instrumentation) { +env->tlb_access_victim++; +} + for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = >tlb_v_table[mmu_idx][vidx]; target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); @@ -885,6 +892,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, CPUIOTLBEntry tmpio, *io = >iotlb[mmu_idx][index]; CPUIOTLBEntry *vio = >iotlb_v[mmu_idx][vidx]; tmpio = *io; *io = *vio; *vio = tmpio; + +if (enable_instrumentation) { +env->tlb_access_victim_hit++; +} + return true; } } diff --git a/cpus.c b/cpus.c index 14bb8d552e..14669b3469 100644 --- a/cpus.c +++ b/cpus.c @@ -1602,6 +1602,32 @@ static bool all_vcpus_paused(void) return true; } +void print_tlb_stats(void) +{ +CPUState *cpu; +CPU_FOREACH(cpu) { +CPUArchState *cs = cpu->env_ptr; + +fprintf(stderr, "TLB accesses %lu, hits %lu, victim accesses %lu, hits %lu\n", +cs->tlb_access_total, cs->tlb_access_hit, cs->tlb_access_victim, +cs->tlb_access_victim_hit); +} +} + +void clear_tlb_stats(void) +{ +CPUState *cpu; +CPU_FOREACH(cpu) { +CPUArchState *cs = cpu->env_ptr; + +cs->tlb_access_total= 0; +cs->tlb_access_hit = 0; +cs->tlb_access_victim = 0; +cs->tlb_access_victim = 0; +cs->tlb_access_victim_hit = 0; +} +} + void pause_all_vcpus(void) { CPUState *cpu; diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 5f4e303635..29b3c2ada8 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -138,6 +138,10 @@ typedef struct CPUIOTLBEntry { target_ulong tlb_flush_addr;\ target_ulong tlb_flush_mask;\ target_ulong vtlb_index;\ +target_ulong tlb_access_hit;\ +target_ulong tlb_access_total; \ +target_ulong tlb_access_victim; \ +target_ulong tlb_access_victim_hit; \ #else diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 731756d948..7d8d92646c 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -10,6 +10,8 @@ void resume_all_vcpus(void); void pause_all_vcpus(void); void cpu_stop_current(void); void cpu_ticks_init(void); +void print_tlb_stats(void); +void clear_tlb_stats(void); void configure_icount(QemuOpts *opts, Error **errp); extern int use_icount; diff --git a/target/arm/helper.c b/target/arm/helper.c index dfbf03676c..d2e75b0f20 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1124,7 +1124,9 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) } } -bool enable_instrumentation; +extern bool enable_instrumentation; +extern void print_tlb_stats(void); +extern void clear_tlb_stats(void); static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -1139,6 +1141,8 @@ static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, } else if (value == 0xfa11dead) { printf("Disabling instrumentation\n"); enable_instrumentation = false; +print_tlb_stats(); +clear_tlb_stats(); tb_flush(cs); } diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 9d7d25c017..b75bd54c35 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1250,6 +1250,8 @@ static void *