On 20.03.15 10:39, Paul Mackerras wrote:
> This reads the timebase at various points in the real-mode guest
> entry/exit code and uses that to accumulate total, minimum and
> maximum time spent in those parts of the code.  Currently these
> times are accumulated per vcpu in 5 parts of the code:
> 
> * rm_entry - time taken from the start of kvmppc_hv_entry() until
>   just before entering the guest.
> * rm_intr - time from when we take a hypervisor interrupt in the
>   guest until we either re-enter the guest or decide to exit to the
>   host.  This includes time spent handling hcalls in real mode.
> * rm_exit - time from when we decide to exit the guest until the
>   return from kvmppc_hv_entry().
> * guest - time spend in the guest
> * cede - time spent napping in real mode due to an H_CEDE hcall
>   while other threads in the same vcore are active.
> 
> These times are exposed in debugfs in a directory per vcpu that
> contains a file called "timings".  This file contains one line for
> each of the 5 timings above, with the name followed by a colon and
> 4 numbers, which are the count (number of times the code has been
> executed), the total time, the minimum time, and the maximum time,
> all in nanoseconds.
> 
> Signed-off-by: Paul Mackerras <pau...@samba.org>

Have you measure the additional overhead this brings?

> ---
>  arch/powerpc/include/asm/kvm_host.h     |  19 +++++
>  arch/powerpc/include/asm/time.h         |   3 +
>  arch/powerpc/kernel/asm-offsets.c       |  11 +++
>  arch/powerpc/kernel/time.c              |   6 ++
>  arch/powerpc/kvm/book3s_hv.c            | 135 
> ++++++++++++++++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 105 ++++++++++++++++++++++++-
>  6 files changed, 276 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index f1d0bbc..286c0ce 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -369,6 +369,14 @@ struct kvmppc_slb {
>       u8 base_page_size;      /* MMU_PAGE_xxx */
>  };
>  
> +/* Struct used to accumulate timing information in HV real mode code */
> +struct kvmhv_tb_accumulator {
> +     u64     seqcount;       /* used to synchronize access, also count * 2 */
> +     u64     tb_total;       /* total time in timebase ticks */
> +     u64     tb_min;         /* min time */
> +     u64     tb_max;         /* max time */
> +};
> +
>  # ifdef CONFIG_PPC_FSL_BOOK3E
>  #define KVMPPC_BOOKE_IAC_NUM 2
>  #define KVMPPC_BOOKE_DAC_NUM 2
> @@ -656,6 +664,17 @@ struct kvm_vcpu_arch {
>       u64 busy_preempt;
>  
>       u32 emul_inst;
> +
> +     struct kvmhv_tb_accumulator *cur_activity;      /* What we're timing */
> +     u64     cur_tb_start;                   /* when it started */
> +     struct kvmhv_tb_accumulator rm_entry;   /* real-mode entry code */
> +     struct kvmhv_tb_accumulator rm_intr;    /* real-mode intr handling */
> +     struct kvmhv_tb_accumulator rm_exit;    /* real-mode exit code */
> +     struct kvmhv_tb_accumulator guest_time; /* guest execution */
> +     struct kvmhv_tb_accumulator cede_time;  /* time napping inside guest */
> +
> +     struct dentry *debugfs_dir;
> +     struct dentry *debugfs_timings;
>  #endif
>  };
>  
> diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
> index 03cbada..10fc784 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
>  
>  DECLARE_PER_CPU(u64, decrementers_next_tb);
>  
> +/* Convert timebase ticks to nanoseconds */
> +unsigned long long tb_to_ns(unsigned long long tb_ticks);
> +
>  #endif /* __KERNEL__ */
>  #endif /* __POWERPC_TIME_H */
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 4717859..ec9f59c 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -458,6 +458,17 @@ int main(void)
>       DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
>       DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
>       DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
> +     DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
> +     DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
> +     DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
> +     DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
> +     DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
> +     DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
> +     DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, 
> arch.cur_tb_start));
> +     DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
> +     DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
> +     DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
> +     DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
>  #endif
>       DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
>       DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 2d7b33f..56f4484 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
>  }
>  #endif
>  
> +unsigned long long tb_to_ns(unsigned long long ticks)
> +{
> +     return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
> +}
> +EXPORT_SYMBOL_GPL(tb_to_ns);
> +
>  /*
>   * Scheduler clock - returns current time in nanosec units.
>   *
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6631b53..8517c33 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1411,6 +1411,127 @@ static struct kvmppc_vcore 
> *kvmppc_vcore_create(struct kvm *kvm, int core)
>       return vcore;
>  }
>  
> +static struct debugfs_timings_element {
> +     const char *name;
> +     size_t offset;
> +} timings[] = {
> +     {"rm_entry",    offsetof(struct kvm_vcpu, arch.rm_entry)},
> +     {"rm_intr",     offsetof(struct kvm_vcpu, arch.rm_intr)},
> +     {"rm_exit",     offsetof(struct kvm_vcpu, arch.rm_exit)},
> +     {"guest",       offsetof(struct kvm_vcpu, arch.guest_time)},
> +     {"cede",        offsetof(struct kvm_vcpu, arch.cede_time)},
> +};
> +
> +#define N_TIMINGS    (sizeof(timings) / sizeof(timings[0]))
> +
> +struct debugfs_timings_state {
> +     struct kvm_vcpu *vcpu;
> +     unsigned int    buflen;
> +     char            buf[N_TIMINGS * 100];
> +};
> +
> +static int debugfs_timings_open(struct inode *inode, struct file *file)
> +{
> +     struct kvm_vcpu *vcpu = inode->i_private;
> +     struct debugfs_timings_state *p;
> +
> +     p = kzalloc(sizeof(*p), GFP_KERNEL);
> +     if (!p)
> +             return -ENOMEM;
> +
> +     kvm_get_kvm(vcpu->kvm);
> +     p->vcpu = vcpu;
> +     file->private_data = p;
> +
> +     return nonseekable_open(inode, file);
> +}
> +
> +static int debugfs_timings_release(struct inode *inode, struct file *file)
> +{
> +     struct debugfs_timings_state *p = file->private_data;
> +
> +     kvm_put_kvm(p->vcpu->kvm);
> +     kfree(p);
> +     return 0;
> +}
> +
> +static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
> +                                 size_t len, loff_t *ppos)
> +{
> +     struct debugfs_timings_state *p = file->private_data;
> +     struct kvm_vcpu *vcpu = p->vcpu;
> +     char *s;
> +     struct kvmhv_tb_accumulator tb;
> +     u64 count;
> +     loff_t pos;
> +     ssize_t n;
> +     int i, loops;
> +     bool ok;
> +
> +     if (!p->buflen) {
> +             s = p->buf;
> +             for (i = 0; i < N_TIMINGS; ++i) {
> +                     struct kvmhv_tb_accumulator *acc;
> +
> +                     acc = (struct kvmhv_tb_accumulator *)
> +                             ((unsigned long)vcpu + timings[i].offset);
> +                     ok = false;
> +                     for (loops = 0; loops < 1000; ++loops) {
> +                             count = acc->seqcount;
> +                             if (!(count & 1)) {
> +                                     smp_rmb();
> +                                     tb = *acc;
> +                                     smp_rmb();
> +                                     if (count == acc->seqcount) {
> +                                             ok = true;
> +                                             break;
> +                                     }
> +                             }
> +                             udelay(1);
> +                     }
> +                     if (!ok)
> +                             sprintf(s, "%s: stuck\n", timings[i].name);
> +                     else
> +                             sprintf(s, "%s: %llu %llu %llu %llu\n",
> +                                     timings[i].name, count / 2,
> +                                     tb_to_ns(tb.tb_total),
> +                                     tb_to_ns(tb.tb_min),
> +                                     tb_to_ns(tb.tb_max));
> +                     s += strlen(s);
> +             }
> +             p->buflen = s - p->buf;
> +     }
> +
> +     pos = *ppos;
> +     if (pos >= p->buflen)
> +             return 0;
> +     if (len > p->buflen - pos)
> +             len = p->buflen - pos;
> +     n = copy_to_user(buf, p->buf + pos, len);
> +     if (n) {
> +             if (n == len)
> +                     return -EFAULT;
> +             len -= n;
> +     }
> +     *ppos = pos + len;
> +     return len;
> +}
> +
> +static ssize_t debugfs_timings_write(struct file *file, const char __user 
> *buf,
> +                                  size_t len, loff_t *ppos)
> +{
> +     return -EACCES;
> +}
> +
> +static const struct file_operations debugfs_timings_ops = {
> +     .owner   = THIS_MODULE,
> +     .open    = debugfs_timings_open,
> +     .release = debugfs_timings_release,
> +     .read    = debugfs_timings_read,
> +     .write   = debugfs_timings_write,
> +     .llseek  = generic_file_llseek,
> +};
> +
>  static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
>                                                  unsigned int id)
>  {
> @@ -1418,6 +1539,7 @@ static struct kvm_vcpu 
> *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
>       int err = -EINVAL;
>       int core;
>       struct kvmppc_vcore *vcore;
> +     char buf[16];
>  
>       core = id / threads_per_subcore;
>       if (core >= KVM_MAX_VCORES)
> @@ -1480,6 +1602,19 @@ static struct kvm_vcpu 
> *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
>       vcpu->arch.cpu_type = KVM_CPU_3S_64;
>       kvmppc_sanity_check(vcpu);
>  
> +     /* Create a debugfs directory for the vcpu */
> +     sprintf(buf, "vcpu%u", id);

Please make sure to use snprintf and friends rather than assume that you
happened to choose the buffer correctly sized.


Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to