On 07/19/2016 10:12 AM, Suraj Jitindar Singh wrote:
> This patch introduces new halt polling functionality into the kvm_hv kernel
> module. When a vcore is idle it will poll for some period of time before
> scheduling itself out.

Some wording on why you cannot use the common code might be useful. 
> 
> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
> idle) we schedule ourselves out to allow something else to run. In the
> event that we need to wake up very quickly (for example an interrupt
> arrives), we are required to wait until we get scheduled again.
> 
> Implement halt polling so that when a vcore is idle, and before scheduling
> ourselves, we poll for vcpus in the runnable_threads list which have
> pending exceptions or which leave the ceded state. If we poll successfully
> then we can get back into the guest very quickly without ever scheduling
> ourselves, otherwise we schedule ourselves out as before.
> 
> Testing of this patch with a TCP round robin test between two guests with
> virtio network interfaces has found a decrease in round trip time of ~15us
> on average. A performance gain is only seen when going out of and
> back into the guest often and quickly, otherwise there is no net benefit
> from the polling. The polling interval is adjusted such that when we are
> often scheduled out for long periods of time it is reduced, and when we
> often poll successfully it is increased. The rate at which the polling
> interval increases or decreases, and the maximum polling interval, can
> be set through module parameters.
> 
> Based on the implementation in the generic kvm module by Wanpeng Li and
> Paolo Bonzini, and on direction from Paul Mackerras.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsi...@gmail.com>
> ---
>  arch/powerpc/include/asm/kvm_book3s.h |   1 +
>  arch/powerpc/include/asm/kvm_host.h   |   1 +
>  arch/powerpc/kvm/book3s_hv.c          | 116 
> ++++++++++++++++++++++++++++++----
>  arch/powerpc/kvm/trace_hv.h           |  22 +++++++
>  4 files changed, 126 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 151f817..c261f52 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
>       ulong pcr;
>       ulong dpdes;            /* doorbell state (POWER8) */
>       ulong conferring_threads;
> +     unsigned int halt_poll_ns;
>  };
> 
>  struct kvmppc_vcpu_book3s {
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 02d06e9..610f393 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ struct kvm_arch {
>  #define VCORE_SLEEPING       3
>  #define VCORE_RUNNING        4
>  #define VCORE_EXITING        5
> +#define VCORE_POLLING        6
> 
>  /*
>   * Struct used to manage memory for a virtual processor area
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 3bcf9e6..a9de1d4 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, 
> &h_ipi_redirect,
>  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host 
> core");
>  #endif
> 
> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
> +
> +/* Factor by which the vcore halt poll interval is grown, default is to 
> double
> + */
> +static unsigned int halt_poll_ns_grow = 2;
> +module_param(halt_poll_ns_grow, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
> +
> +/* Factor by which the vcore halt poll interval is shrunk, default is to 
> reset
> + */
> +static unsigned int halt_poll_ns_shrink;
> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
> +
>  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
> 
> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore 
> *vc,
>       finish_wait(&vcpu->arch.cpu_run, &wait);
>  }
> 
> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> +     /* 10us base */
> +     if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
> +             vc->halt_poll_ns = 10000;
> +     else
> +             vc->halt_poll_ns *= halt_poll_ns_grow;
> +
> +     if (vc->halt_poll_ns > halt_poll_max_ns)
> +             vc->halt_poll_ns = halt_poll_max_ns;
> +}
> +
> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> +     if (halt_poll_ns_shrink == 0)
> +             vc->halt_poll_ns = 0;
> +     else
> +             vc->halt_poll_ns /= halt_poll_ns_shrink;
> +}
> +
> +/* Check to see if any of the runnable vcpus on the vcore have pending
> + * exceptions or are no longer ceded
> + */
> +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
> +{
> +     struct kvm_vcpu *vcpu;
> +     int i;
> +
> +     for_each_runnable_thread(i, vcpu, vc) {
> +             if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
> +                     return 1;
> +     }
> +
> +     return 0;
> +}
> +
>  /*
>   * All the vcpus in this vcore are idle, so wait for a decrementer
>   * or external interrupt to one of the vcpus.  vc->lock is held.
>   */
>  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  {
> -     struct kvm_vcpu *vcpu;
> -     int do_sleep = 1, i;
> +     int do_sleep = 1;
> +     ktime_t cur, start;
> +     u64 block_ns;
>       DECLARE_SWAITQUEUE(wait);
> 
> -     prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> +     /* Poll for pending exceptions and ceded state */
> +     cur = start = ktime_get();
> +     if (vc->halt_poll_ns) {
> +             ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
> 
> -     /*
> -      * Check one last time for pending exceptions and ceded state after
> -      * we put ourselves on the wait queue
> -      */
> -     for_each_runnable_thread(i, vcpu, vc) {
> -             if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
> -                     do_sleep = 0;
> -                     break;
> -             }
> +             vc->vcore_state = VCORE_POLLING;
> +             spin_unlock(&vc->lock);
> +
> +             do {
> +                     if (kvmppc_vcore_check_block(vc)) {
> +                             do_sleep = 0;
> +                             break;
> +                     }
> +                     cur = ktime_get();
> +             } while (ktime_before(cur, stop));
> +
> +             spin_lock(&vc->lock);
> +             vc->vcore_state = VCORE_INACTIVE;
> +
> +             if (!do_sleep)
> +                     goto out;
>       }
> 
> -     if (!do_sleep) {
> +     prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> +
> +     if (kvmppc_vcore_check_block(vc)) {
>               finish_swait(&vc->wq, &wait);
> -             return;
> +             do_sleep = 0;
> +             goto out;
>       }
> 
>       vc->vcore_state = VCORE_SLEEPING;
> @@ -2656,6 +2723,27 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
> *vc)
>       spin_lock(&vc->lock);
>       vc->vcore_state = VCORE_INACTIVE;
>       trace_kvmppc_vcore_blocked(vc, 1);
> +
> +     cur = ktime_get();
> +
> +out:
> +     block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
> +
> +     /* Adjust poll time */
> +     if (halt_poll_max_ns) {
> +             if (block_ns <= vc->halt_poll_ns)
> +                     ;
> +             /* We slept and blocked for longer than the max halt time */
> +             else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
> +                     shrink_halt_poll_ns(vc);
> +             /* We slept and our poll time is too small */
> +             else if (vc->halt_poll_ns < halt_poll_max_ns &&
> +                             block_ns < halt_poll_max_ns)
> +                     grow_halt_poll_ns(vc);
> +     } else
> +             vc->halt_poll_ns = 0;
> +
> +     trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
>  }
> 
>  static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
> index 33d9daf..fb21990 100644
> --- a/arch/powerpc/kvm/trace_hv.h
> +++ b/arch/powerpc/kvm/trace_hv.h
> @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
>                  __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
>  );
> 
> +TRACE_EVENT(kvmppc_vcore_wakeup,
> +     TP_PROTO(int do_sleep, __u64 ns),
> +
> +     TP_ARGS(do_sleep, ns),
> +
> +     TP_STRUCT__entry(
> +             __field(__u64,  ns)
> +             __field(int,    waited)
> +             __field(pid_t,  tgid)
> +     ),
> +
> +     TP_fast_assign(
> +             __entry->ns     = ns;
> +             __entry->waited = do_sleep;
> +             __entry->tgid   = current->tgid;
> +     ),
> +
> +     TP_printk("%s time %lld ns, tgid=%d",
> +             __entry->waited ? "wait" : "poll",
> +             __entry->ns, __entry->tgid)
> +);
> +
>  TRACE_EVENT(kvmppc_run_vcpu_enter,
>       TP_PROTO(struct kvm_vcpu *vcpu),
> 

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to