Re: [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats

2017-07-10 Thread Paolo Bonzini
On 10/07/2017 12:03, Alex Bennée wrote:
>> +extern bool enable_instrumentation;
>> +
> Is there a better place for this than a static global? I was pondering
> tcg_ctx but that's not really visible to the runtime. Making it part of
> the TB flags might be useful for only instrumenting certain segments of
> the code but I suspect I'm bike-shedding at this point.

Why not use tracepoints?  This seems to be a natural candidate for
systemtap-like tracing scripts.

Paolo



Re: [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats

2017-07-10 Thread Alex Bennée

Pranith Kumar  writes:

> I used the following patch to collect hit/miss TLB ratios for a few
> benchmarks. The results can be found here: http://imgur.com/a/gee1o
>
> Please note that these results also include boot/shutdown as the
> per-region instrumentation patch came later.
>
> Signed-off-by: Pranith Kumar 
> ---
>  accel/tcg/cputlb.c| 12 
>  cpus.c| 26 ++
>  include/exec/cpu-defs.h   |  4 
>  include/sysemu/cpus.h |  2 ++
>  target/arm/helper.c   |  6 +-
>  tcg/i386/tcg-target.inc.c | 16 ++--
>  vl.c  |  3 +++
>  7 files changed, 66 insertions(+), 3 deletions(-)
>
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index ef52a7e5e0..2ac2397431 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -864,12 +864,19 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry 
> *iotlbentry,
>  }
>  }
>
> +extern bool enable_instrumentation;
> +

Is there a better place for this than a static global? I was pondering
tcg_ctx but that's not really visible to the runtime. Making it part of
the TB flags might be useful for only instrumenting certain segments of
the code but I suspect I'm bike-shedding at this point.

>  /* Return true if ADDR is present in the victim tlb, and has been copied
> back to the main tlb.  */
>  static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
> size_t elt_ofs, target_ulong page)
>  {
>  size_t vidx;
> +
> +if (enable_instrumentation) {
> +env->tlb_access_victim++;
> +}
> +
>  for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
>  CPUTLBEntry *vtlb = >tlb_v_table[mmu_idx][vidx];
>  target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
> @@ -885,6 +892,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
> mmu_idx, size_t index,
>  CPUIOTLBEntry tmpio, *io = >iotlb[mmu_idx][index];
>  CPUIOTLBEntry *vio = >iotlb_v[mmu_idx][vidx];
>  tmpio = *io; *io = *vio; *vio = tmpio;
> +
> +if (enable_instrumentation) {
> +env->tlb_access_victim_hit++;
> +}
> +
>  return true;
>  }
>  }
> diff --git a/cpus.c b/cpus.c
> index 14bb8d552e..14669b3469 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1602,6 +1602,32 @@ static bool all_vcpus_paused(void)
>  return true;
>  }
>
> +void print_tlb_stats(void)
> +{
> +CPUState *cpu;
> +CPU_FOREACH(cpu) {
> +CPUArchState *cs = cpu->env_ptr;
> +
> +fprintf(stderr, "TLB accesses %lu, hits %lu, victim accesses %lu, 
> hits %lu\n",
> +cs->tlb_access_total, cs->tlb_access_hit, 
> cs->tlb_access_victim,
> +cs->tlb_access_victim_hit);
> +}
> +}
> +
> +void clear_tlb_stats(void)
> +{
> +CPUState *cpu;
> +CPU_FOREACH(cpu) {
> +CPUArchState *cs = cpu->env_ptr;
> +
> +cs->tlb_access_total= 0;
> +cs->tlb_access_hit  = 0;
> +cs->tlb_access_victim   = 0;
> +cs->tlb_access_victim   = 0;

Duplicate line here.

> +cs->tlb_access_victim_hit   = 0;
> +}
> +}
> +
>  void pause_all_vcpus(void)
>  {
>  CPUState *cpu;
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 5f4e303635..29b3c2ada8 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -138,6 +138,10 @@ typedef struct CPUIOTLBEntry {
>  target_ulong tlb_flush_addr;\
>  target_ulong tlb_flush_mask;\
>  target_ulong vtlb_index;\
> +target_ulong tlb_access_hit;\
> +target_ulong tlb_access_total;  \
> +target_ulong tlb_access_victim; \
> +target_ulong tlb_access_victim_hit; \
>
>  #else
>
> diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
> index 731756d948..7d8d92646c 100644
> --- a/include/sysemu/cpus.h
> +++ b/include/sysemu/cpus.h
> @@ -10,6 +10,8 @@ void resume_all_vcpus(void);
>  void pause_all_vcpus(void);
>  void cpu_stop_current(void);
>  void cpu_ticks_init(void);
> +void print_tlb_stats(void);
> +void clear_tlb_stats(void);
>
>  void configure_icount(QemuOpts *opts, Error **errp);
>  extern int use_icount;
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index dfbf03676c..d2e75b0f20 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -1124,7 +1124,9 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const 
> ARMCPRegInfo *ri)
>  }
>  }
>
> -bool enable_instrumentation;
> +extern bool enable_instrumentation;
> +extern void print_tlb_stats(void);
> +extern void clear_tlb_stats(void);
>
>  static void 

[Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats

2017-06-27 Thread Pranith Kumar
I used the following patch to collect hit/miss TLB ratios for a few
benchmarks. The results can be found here: http://imgur.com/a/gee1o

Please note that these results also include boot/shutdown as the
per-region instrumentation patch came later.

Signed-off-by: Pranith Kumar 
---
 accel/tcg/cputlb.c| 12 
 cpus.c| 26 ++
 include/exec/cpu-defs.h   |  4 
 include/sysemu/cpus.h |  2 ++
 target/arm/helper.c   |  6 +-
 tcg/i386/tcg-target.inc.c | 16 ++--
 vl.c  |  3 +++
 7 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index ef52a7e5e0..2ac2397431 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -864,12 +864,19 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry 
*iotlbentry,
 }
 }
 
+extern bool enable_instrumentation;
+
 /* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb.  */
 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
size_t elt_ofs, target_ulong page)
 {
 size_t vidx;
+
+if (enable_instrumentation) {
+env->tlb_access_victim++;
+}
+
 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
 CPUTLBEntry *vtlb = >tlb_v_table[mmu_idx][vidx];
 target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
@@ -885,6 +892,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
 CPUIOTLBEntry tmpio, *io = >iotlb[mmu_idx][index];
 CPUIOTLBEntry *vio = >iotlb_v[mmu_idx][vidx];
 tmpio = *io; *io = *vio; *vio = tmpio;
+
+if (enable_instrumentation) {
+env->tlb_access_victim_hit++;
+}
+
 return true;
 }
 }
diff --git a/cpus.c b/cpus.c
index 14bb8d552e..14669b3469 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1602,6 +1602,32 @@ static bool all_vcpus_paused(void)
 return true;
 }
 
+void print_tlb_stats(void)
+{
+CPUState *cpu;
+CPU_FOREACH(cpu) {
+CPUArchState *cs = cpu->env_ptr;
+
+fprintf(stderr, "TLB accesses %lu, hits %lu, victim accesses %lu, hits 
%lu\n",
+cs->tlb_access_total, cs->tlb_access_hit, 
cs->tlb_access_victim,
+cs->tlb_access_victim_hit);
+}
+}
+
+void clear_tlb_stats(void)
+{
+CPUState *cpu;
+CPU_FOREACH(cpu) {
+CPUArchState *cs = cpu->env_ptr;
+
+cs->tlb_access_total= 0;
+cs->tlb_access_hit  = 0;
+cs->tlb_access_victim   = 0;
+cs->tlb_access_victim   = 0;
+cs->tlb_access_victim_hit   = 0;
+}
+}
+
 void pause_all_vcpus(void)
 {
 CPUState *cpu;
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 5f4e303635..29b3c2ada8 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -138,6 +138,10 @@ typedef struct CPUIOTLBEntry {
 target_ulong tlb_flush_addr;\
 target_ulong tlb_flush_mask;\
 target_ulong vtlb_index;\
+target_ulong tlb_access_hit;\
+target_ulong tlb_access_total;  \
+target_ulong tlb_access_victim; \
+target_ulong tlb_access_victim_hit; \
 
 #else
 
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 731756d948..7d8d92646c 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -10,6 +10,8 @@ void resume_all_vcpus(void);
 void pause_all_vcpus(void);
 void cpu_stop_current(void);
 void cpu_ticks_init(void);
+void print_tlb_stats(void);
+void clear_tlb_stats(void);
 
 void configure_icount(QemuOpts *opts, Error **errp);
 extern int use_icount;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index dfbf03676c..d2e75b0f20 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1124,7 +1124,9 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const 
ARMCPRegInfo *ri)
 }
 }
 
-bool enable_instrumentation;
+extern bool enable_instrumentation;
+extern void print_tlb_stats(void);
+extern void clear_tlb_stats(void);
 
 static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 uint64_t value)
@@ -1139,6 +1141,8 @@ static void pmuserenr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 } else if (value == 0xfa11dead) {
 printf("Disabling instrumentation\n");
 enable_instrumentation = false;
+print_tlb_stats();
+clear_tlb_stats();
 tb_flush(cs);
 }
 
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 9d7d25c017..b75bd54c35 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -1250,6 +1250,8 @@ static void *