This reads the timebase at various points in the real-mode guest
entry/exit code and uses that to accumulate total, minimum and
maximum time spent in those parts of the code.  Currently these
times are accumulated per vcpu in 5 parts of the code:

* rm_entry - time taken from the start of kvmppc_hv_entry() until
  just before entering the guest.
* rm_intr - time from when we take a hypervisor interrupt in the
  guest until we either re-enter the guest or decide to exit to the
  host.  This includes time spent handling hcalls in real mode.
* rm_exit - time from when we decide to exit the guest until the
  return from kvmppc_hv_entry().
* guest - time spend in the guest
* cede - time spent napping in real mode due to an H_CEDE hcall
  while other threads in the same vcore are active.

These times are exposed in debugfs in a directory per vcpu that
contains a file called "timings".  This file contains one line for
each of the 5 timings above, with the name followed by a colon and
4 numbers, which are the count (number of times the code has been
executed), the total time, the minimum time, and the maximum time,
all in nanoseconds.

Signed-off-by: Paul Mackerras <[email protected]>
---
 arch/powerpc/include/asm/kvm_host.h     |  19 +++++
 arch/powerpc/include/asm/time.h         |   3 +
 arch/powerpc/kernel/asm-offsets.c       |  11 +++
 arch/powerpc/kernel/time.c              |   6 ++
 arch/powerpc/kvm/book3s_hv.c            | 135 ++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 105 ++++++++++++++++++++++++-
 6 files changed, 276 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index f1d0bbc..286c0ce 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -369,6 +369,14 @@ struct kvmppc_slb {
        u8 base_page_size;      /* MMU_PAGE_xxx */
 };
 
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+       u64     seqcount;       /* used to synchronize access, also count * 2 */
+       u64     tb_total;       /* total time in timebase ticks */
+       u64     tb_min;         /* min time */
+       u64     tb_max;         /* max time */
+};
+
 # ifdef CONFIG_PPC_FSL_BOOK3E
 #define KVMPPC_BOOKE_IAC_NUM   2
 #define KVMPPC_BOOKE_DAC_NUM   2
@@ -656,6 +664,17 @@ struct kvm_vcpu_arch {
        u64 busy_preempt;
 
        u32 emul_inst;
+
+       struct kvmhv_tb_accumulator *cur_activity;      /* What we're timing */
+       u64     cur_tb_start;                   /* when it started */
+       struct kvmhv_tb_accumulator rm_entry;   /* real-mode entry code */
+       struct kvmhv_tb_accumulator rm_intr;    /* real-mode intr handling */
+       struct kvmhv_tb_accumulator rm_exit;    /* real-mode exit code */
+       struct kvmhv_tb_accumulator guest_time; /* guest execution */
+       struct kvmhv_tb_accumulator cede_time;  /* time napping inside guest */
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_timings;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 03cbada..10fc784 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 4717859..ec9f59c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -458,6 +458,17 @@ int main(void)
        DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
        DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
        DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+       DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
+       DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
+       DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
+       DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
+       DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
+       DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
+       DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, 
arch.cur_tb_start));
+       DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
+       DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
+       DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
+       DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
 #endif
        DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
        DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2d7b33f..56f4484 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
 }
 #endif
 
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+       return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  *
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6631b53..8517c33 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1411,6 +1411,127 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct 
kvm *kvm, int core)
        return vcore;
 }
 
+static struct debugfs_timings_element {
+       const char *name;
+       size_t offset;
+} timings[] = {
+       {"rm_entry",    offsetof(struct kvm_vcpu, arch.rm_entry)},
+       {"rm_intr",     offsetof(struct kvm_vcpu, arch.rm_intr)},
+       {"rm_exit",     offsetof(struct kvm_vcpu, arch.rm_exit)},
+       {"guest",       offsetof(struct kvm_vcpu, arch.guest_time)},
+       {"cede",        offsetof(struct kvm_vcpu, arch.cede_time)},
+};
+
+#define N_TIMINGS      (sizeof(timings) / sizeof(timings[0]))
+
+struct debugfs_timings_state {
+       struct kvm_vcpu *vcpu;
+       unsigned int    buflen;
+       char            buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+       struct kvm_vcpu *vcpu = inode->i_private;
+       struct debugfs_timings_state *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       kvm_get_kvm(vcpu->kvm);
+       p->vcpu = vcpu;
+       file->private_data = p;
+
+       return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+       struct debugfs_timings_state *p = file->private_data;
+
+       kvm_put_kvm(p->vcpu->kvm);
+       kfree(p);
+       return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+                                   size_t len, loff_t *ppos)
+{
+       struct debugfs_timings_state *p = file->private_data;
+       struct kvm_vcpu *vcpu = p->vcpu;
+       char *s;
+       struct kvmhv_tb_accumulator tb;
+       u64 count;
+       loff_t pos;
+       ssize_t n;
+       int i, loops;
+       bool ok;
+
+       if (!p->buflen) {
+               s = p->buf;
+               for (i = 0; i < N_TIMINGS; ++i) {
+                       struct kvmhv_tb_accumulator *acc;
+
+                       acc = (struct kvmhv_tb_accumulator *)
+                               ((unsigned long)vcpu + timings[i].offset);
+                       ok = false;
+                       for (loops = 0; loops < 1000; ++loops) {
+                               count = acc->seqcount;
+                               if (!(count & 1)) {
+                                       smp_rmb();
+                                       tb = *acc;
+                                       smp_rmb();
+                                       if (count == acc->seqcount) {
+                                               ok = true;
+                                               break;
+                                       }
+                               }
+                               udelay(1);
+                       }
+                       if (!ok)
+                               sprintf(s, "%s: stuck\n", timings[i].name);
+                       else
+                               sprintf(s, "%s: %llu %llu %llu %llu\n",
+                                       timings[i].name, count / 2,
+                                       tb_to_ns(tb.tb_total),
+                                       tb_to_ns(tb.tb_min),
+                                       tb_to_ns(tb.tb_max));
+                       s += strlen(s);
+               }
+               p->buflen = s - p->buf;
+       }
+
+       pos = *ppos;
+       if (pos >= p->buflen)
+               return 0;
+       if (len > p->buflen - pos)
+               len = p->buflen - pos;
+       n = copy_to_user(buf, p->buf + pos, len);
+       if (n) {
+               if (n == len)
+                       return -EFAULT;
+               len -= n;
+       }
+       *ppos = pos + len;
+       return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+                                    size_t len, loff_t *ppos)
+{
+       return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+       .owner   = THIS_MODULE,
+       .open    = debugfs_timings_open,
+       .release = debugfs_timings_release,
+       .read    = debugfs_timings_read,
+       .write   = debugfs_timings_write,
+       .llseek  = generic_file_llseek,
+};
+
 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
                                                   unsigned int id)
 {
@@ -1418,6 +1539,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct 
kvm *kvm,
        int err = -EINVAL;
        int core;
        struct kvmppc_vcore *vcore;
+       char buf[16];
 
        core = id / threads_per_subcore;
        if (core >= KVM_MAX_VCORES)
@@ -1480,6 +1602,19 @@ static struct kvm_vcpu 
*kvmppc_core_vcpu_create_hv(struct kvm *kvm,
        vcpu->arch.cpu_type = KVM_CPU_3S_64;
        kvmppc_sanity_check(vcpu);
 
+       /* Create a debugfs directory for the vcpu */
+       sprintf(buf, "vcpu%u", id);
+       if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) {
+               vcpu->arch.debugfs_dir = debugfs_create_dir(buf,
+                                               kvm->arch.debugfs_dir);
+               if (!IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) {
+                       vcpu->arch.debugfs_timings =
+                               debugfs_create_file("timings", 0444,
+                                                   vcpu->arch.debugfs_dir,
+                                                   vcpu, &debugfs_timings_ops);
+               }
+       }
+
        return vcpu;
 
 free_vcpu:
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0814ca1..d71ae2f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -225,9 +225,14 @@ kvm_novcpu_wakeup:
        /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
        ld      r4, HSTATE_KVM_VCPU(r13)
        cmpdi   r4, 0
-       bne     kvmppc_got_guest
+       beq     kvmppc_primary_no_guest
+
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+       b       kvmppc_got_guest
 
 kvm_novcpu_exit:
+       ld      r4, HSTATE_KVM_VCPU(r13)
        b       hdec_soon
 
 /*
@@ -376,6 +381,12 @@ kvmppc_hv_entry:
        li      r6, KVM_GUEST_MODE_HOST_HV
        stb     r6, HSTATE_IN_GUEST(r13)
 
+       /* Store initial timestamp */
+       cmpdi   r4, 0
+       beq     1f
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+1:
        /* Clear out SLB */
        li      r6,0
        slbmte  r6,r6
@@ -880,6 +891,10 @@ fast_guest_return:
        li      r9, KVM_GUEST_MODE_GUEST_HV
        stb     r9, HSTATE_IN_GUEST(r13)
 
+       /* Accumulate timing */
+       addi    r3, r4, VCPU_TB_GUEST
+       bl      kvmhv_accumulate_time
+
        /* Enter guest */
 
 BEGIN_FTR_SECTION
@@ -917,6 +932,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        hrfid
        b       .
 
+secondary_too_late:
+       cmpdi   r4, 0
+       beq     11f
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+11:    b       kvmhv_switch_to_host
+
+hdec_soon:
+       cmpdi   r4, 0
+       beq     12f
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+12:    b       kvmhv_do_exit
+
 /******************************************************************************
  *                                                                            *
  *                               Exit code                                    *
@@ -1002,6 +1031,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        stw     r12,VCPU_TRAP(r9)
 
+       addi    r3, r9, VCPU_TB_RMINTR
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+       ld      r5, VCPU_GPR(R5)(r9)
+       ld      r6, VCPU_GPR(R6)(r9)
+       ld      r7, VCPU_GPR(R7)(r9)
+       ld      r8, VCPU_GPR(R8)(r9)
+
        /* Save HEIR (HV emulation assist reg) in emul_inst
           if this is an HEI (HV emulation interrupt, e40) */
        li      r3,KVM_INST_FETCH_FAILED
@@ -1073,6 +1110,9 @@ guest_exit_cont:          /* r9 = vcpu, r12 = trap, r13 = 
paca */
        cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
        beq     machine_check_realmode
 mc_cont:
+       addi    r3, r9, VCPU_TB_RMEXIT
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
 
        /* Save guest CTRL register, set runlatch to 1 */
 6:     mfspr   r6,SPRN_CTRLF
@@ -1417,7 +1457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        slbia
        ptesync
 
-hdec_soon:                     /* r12 = trap, r13 = paca */
+kvmhv_do_exit:                 /* r12 = trap, r13 = paca */
        /*
         * POWER7/POWER8 guest -> host partition switch code.
         * We don't have to lock against tlbies but we do
@@ -1476,7 +1516,7 @@ hdec_soon:                        /* r12 = trap, r13 = 
paca */
        addi    r6,r6,PACA_SIZE
        bne     42b
 
-secondary_too_late:
+kvmhv_switch_to_host:
        /* Secondary threads wait for primary to do partition switch */
 43:    ld      r5,HSTATE_KVM_VCORE(r13)
        ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
@@ -1562,6 +1602,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 1:     addi    r8,r8,16
        .endr
 
+       /* Finish timing, if we have a vcpu */
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       li      r3, 0
+       beq     2f
+       bl      kvmhv_accumulate_time
+2:
        /* Unset guest mode */
        li      r0, KVM_GUEST_MODE_NONE
        stb     r0, HSTATE_IN_GUEST(r13)
@@ -2069,6 +2116,10 @@ _GLOBAL(kvmppc_h_cede)
        /* save FP state */
        bl      kvmppc_save_fp
 
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       addi    r3, r4, VCPU_TB_CEDE
+       bl      kvmhv_accumulate_time
+
        /*
         * Take a nap until a decrementer or external or doobell interrupt
         * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
@@ -2109,6 +2160,9 @@ kvm_end_cede:
        /* Woken by external or decrementer interrupt */
        ld      r1, HSTATE_HOST_R1(r13)
 
+       addi    r3, r4, VCPU_TB_RMINTR
+       bl      kvmhv_accumulate_time
+
        /* load up FP state */
        bl      kvmppc_load_fp
 
@@ -2429,3 +2483,48 @@ kvmppc_fix_pmao:
        mtspr   SPRN_PMC6, r3
        isync
        blr
+
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+       mftb    r5
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       std     r5, VCPU_ACTIVITY_START(r4)
+       blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+       ld      r5, VCPU_CUR_ACTIVITY(r4)
+       ld      r6, VCPU_ACTIVITY_START(r4)
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       mftb    r7
+       std     r7, VCPU_ACTIVITY_START(r4)
+       cmpdi   r5, 0
+       beqlr
+       subf    r3, r6, r7
+       ld      r8, TAS_SEQCOUNT(r5)
+       cmpdi   r8, 0
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       lwsync
+       ld      r7, TAS_TOTAL(r5)
+       add     r7, r7, r3
+       std     r7, TAS_TOTAL(r5)
+       ld      r6, TAS_MIN(r5)
+       ld      r7, TAS_MAX(r5)
+       beq     3f
+       cmpd    r3, r6
+       bge     1f
+3:     std     r3, TAS_MIN(r5)
+1:     cmpd    r3, r7
+       ble     2f
+       std     r3, TAS_MAX(r5)
+2:     lwsync
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       blr
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to