[PATCH v3 0/3] KVM: Set vcpu->preempted/ready iff scheduled out while running

2024-05-03 Thread David Matlack
This series changes KVM to mark a vCPU as preempted/ready if-and-only-if
it's scheduled out while running. i.e. Do not mark a vCPU
preempted/ready if it's scheduled out during a non-KVM_RUN ioctl() or
when userspace is doing KVM_RUN with immediate_exit=true.

This is a logical extension of commit 54aa83c90198 ("KVM: x86: do not
set st->preempted when going back to user space"), which  stopped
marking a vCPU as preempted when returning to userspace. But if userspace
invokes a KVM vCPU ioctl() that gets preempted, the vCPU will be marked
preempted/ready. This is arguably incorrect behavior since the vCPU was
not actually preempted while the guest was running, it was preempted
while doing something on behalf of userspace.

In practice, this avoids KVM dirtying guest memory via the steal time
page after userspace has paused vCPUs, e.g. for Live Migration, which
allows userspace to collect the final dirty bitmap before or in parallel
with saving vCPU state without having to worry about saving vCPU state
triggering writes to guest memory.

Patch 1 introduces vcpu->wants_to_run to allow KVM to detect when a vCPU
is in its core run loop.

Patch 2 renames immediated_exit to immediated_exit__unsafe within KVM to
ensure that any new references get extra scrutiny.

Patch 3 perform leverages vcpu->wants_to_run to contrain when
vcpu->preempted and vcpu->ready are set.

v3:
 - Use READ_ONCE() to read immediate_exit [Sean]
 - Replace use of immediate_exit with !wants_to_run to avoid TOCTOU [Sean]
 - Hide/Rename immediate_exit in KVM to harden against TOCTOU bugs [Sean]

v2: https://lore.kernel.org/kvm/20240307163541.92138-1-dmatl...@google.com/
 - Drop Google-specific "PRODKERNEL: " shortlog prefix [me]

v1: https://lore.kernel.org/kvm/20231218185850.1659570-1-dmatl...@google.com/

David Matlack (3):
  KVM: Introduce vcpu->wants_to_run
  KVM: Ensure new code that references immediate_exit gets extra
scrutiny
  KVM: Mark a vCPU as preempted/ready iff it's scheduled out while
running

 arch/arm64/kvm/arm.c   |  2 +-
 arch/loongarch/kvm/vcpu.c  |  2 +-
 arch/mips/kvm/mips.c   |  2 +-
 arch/powerpc/kvm/powerpc.c |  2 +-
 arch/riscv/kvm/vcpu.c  |  2 +-
 arch/s390/kvm/kvm-s390.c   |  2 +-
 arch/x86/kvm/x86.c |  4 ++--
 include/linux/kvm_host.h   |  1 +
 include/uapi/linux/kvm.h   | 15 ++-
 virt/kvm/kvm_main.c|  5 -
 10 files changed, 27 insertions(+), 10 deletions(-)


base-commit: 296655d9bf272cfdd9d2211d099bcb8a61b93037
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog



[PATCH v3 1/3] KVM: Introduce vcpu->wants_to_run

2024-05-03 Thread David Matlack
Introduce vcpu->wants_to_run to indicate when a vCPU is in its core run
loop, i.e. when the vCPU is running the KVM_RUN ioctl and immediate_exit
was not set.

Replace all references to vcpu->run->immediate_exit with
!vcpu->wants_to_run to avoid TOCTOU races with userspace. For example, a
malicious userspace could invoked KVM_RUN with immediate_exit=true and
then after KVM reads it to set wants_to_run=false, flip it to false.
This would result in the vCPU running in KVM_RUN with
wants_to_run=false. This wouldn't cause any real bugs today but is a
dangerous landmine.

Signed-off-by: David Matlack 
---
 arch/arm64/kvm/arm.c   | 2 +-
 arch/loongarch/kvm/vcpu.c  | 2 +-
 arch/mips/kvm/mips.c   | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/riscv/kvm/vcpu.c  | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c | 4 ++--
 include/linux/kvm_host.h   | 1 +
 virt/kvm/kvm_main.c| 3 +++
 9 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c4a0a35e02c7..c587e5d9396e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -986,7 +986,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
vcpu_load(vcpu);
 
-   if (run->immediate_exit) {
+   if (!vcpu->wants_to_run) {
ret = -EINTR;
goto out;
}
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 3a8779065f73..847ef54f3a84 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -1163,7 +1163,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_complete_iocsr_read(vcpu, run);
}
 
-   if (run->immediate_exit)
+   if (!vcpu->wants_to_run)
return r;
 
/* Clear exit_reason */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 231ac052b506..f1a99962027a 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -436,7 +436,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu->mmio_needed = 0;
}
 
-   if (vcpu->run->immediate_exit)
+   if (!vcpu->wants_to_run)
goto out;
 
lose_fpu(1);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d32abe7fe6ab..961aadc71de2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1852,7 +1852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
kvm_sigset_activate(vcpu);
 
-   if (run->immediate_exit)
+   if (!vcpu->wants_to_run)
r = -EINTR;
else
r = kvmppc_vcpu_run(vcpu);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b5ca9f2e98ac..3d8349470ee6 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -711,7 +711,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
}
 
-   if (run->immediate_exit) {
+   if (!vcpu->wants_to_run) {
kvm_vcpu_srcu_read_unlock(vcpu);
return -EINTR;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5147b943a864..b1ea25aacbf9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5033,7 +5033,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.pv.dumping)
return -EINVAL;
 
-   if (kvm_run->immediate_exit)
+   if (!vcpu->wants_to_run)
return -EINTR;
 
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d2619d3eee4..f70ae1558684 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11396,7 +11396,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
kvm_vcpu_srcu_read_lock(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-   if (kvm_run->immediate_exit) {
+   if (!vcpu->wants_to_run) {
r = -EINTR;
goto out;
}
@@ -11474,7 +11474,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(vcpu->mmio_needed);
}
 
-   if (kvm_run->immediate_exit) {
+   if (!vcpu->wants_to_run) {
r = -EINTR;
goto out;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index afbc99264ffa..f9b9ce0c3cd9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -380,6 +380,7 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
 #endif
+   bool wants_to_run;
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 38b498669ef9..bdea5b978f80 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4425,7 +4425,10 @@ static l

[PATCH v3 2/3] KVM: Ensure new code that references immediate_exit gets extra scrutiny

2024-05-03 Thread David Matlack
Ensure that any new KVM code that references immediate_exit gets extra
scrutiny by renaming it to immediate_exit__unsafe in kernel code.

All fields in struct kvm_run are subject to TOCTOU races since they are
mapped into userspace, which may be malicious or buggy. To protect KVM,
this commit introduces a new macro that appends __unsafe to field names
in struct kvm_run, hinting to developers and reviewers that accessing
this field must be done carefully.

Apply the new macro to immediate_exit, since userspace can make
immediate_exit inconsistent with vcpu->wants_to_run, i.e. accessing
immediate_exit directly could lead to unexpected bugs in the future.

Signed-off-by: David Matlack 
---
 include/uapi/linux/kvm.h | 15 ++-
 virt/kvm/kvm_main.c  |  2 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2190adbe3002..3611ad3b9c2a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -192,11 +192,24 @@ struct kvm_xen_exit {
 /* Flags that describe what fields in emulation_failure hold valid data. */
 #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
 
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from  to __unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
/* in */
__u8 request_interrupt_window;
-   __u8 immediate_exit;
+   __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
 
/* out */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bdea5b978f80..2b29851a90bd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4425,7 +4425,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
synchronize_rcu();
put_pid(oldpid);
}
-   vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit);
+   vcpu->wants_to_run = 
!READ_ONCE(vcpu->run->immediate_exit__unsafe);
r = kvm_arch_vcpu_ioctl_run(vcpu);
vcpu->wants_to_run = false;
 
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog



[PATCH v3 3/3] KVM: Mark a vCPU as preempted/ready iff it's scheduled out while running

2024-05-03 Thread David Matlack
Mark a vCPU as preempted/ready if-and-only-if it's scheduled out while
running. i.e. Do not mark a vCPU preempted/ready if it's scheduled out
during a non-KVM_RUN ioctl() or when userspace is doing KVM_RUN with
immediate_exit.

Commit 54aa83c90198 ("KVM: x86: do not set st->preempted when going back
to user space") stopped marking a vCPU as preempted when returning to
userspace, but if userspace then invokes a KVM vCPU ioctl() that gets
preempted, the vCPU will be marked preempted/ready. This is arguably
incorrect behavior since the vCPU was not actually preempted while the
guest was running, it was preempted while doing something on behalf of
userspace.

This commit also avoids KVM dirtying guest memory after userspace has
paused vCPUs, e.g. for Live Migration, which allows userspace to collect
the final dirty bitmap before or in parallel with saving vCPU state
without having to worry about saving vCPU state triggering writes to
guest memory.

Suggested-by: Sean Christopherson 
Signed-off-by: David Matlack 
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b29851a90bd..3973e62acc7c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6302,7 +6302,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
 {
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
-   if (current->on_rq) {
+   if (current->on_rq && vcpu->wants_to_run) {
WRITE_ONCE(vcpu->preempted, true);
WRITE_ONCE(vcpu->ready, true);
}
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog



Re: [PATCH v13 16/35] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory

2023-11-02 Thread David Matlack
On Thu, Nov 2, 2023 at 9:03 AM Sean Christopherson  wrote:
>
> On Thu, Nov 02, 2023, Paolo Bonzini wrote:
> > On 10/31/23 23:39, David Matlack wrote:
> > > > > Maybe can you sketch out how you see this proposal being extensible to
> > > > > using guest_memfd for shared mappings?
> > > > For in-place conversions, e.g. pKVM, no additional guest_memfd is 
> > > > needed.  What's
> > > > missing there is the ability to (safely) mmap() guest_memfd, e.g. KVM 
> > > > needs to
> > > > ensure there are no outstanding references when converting back to 
> > > > private.
> > > >
> > > > For TDX/SNP, assuming we don't find a performant and robust way to do 
> > > > in-place
> > > > conversions, a second fd+offset pair would be needed.
> > > Is there a way to support non-in-place conversions within a single 
> > > guest_memfd?
> >
> > For TDX/SNP, you could have a hook from KVM_SET_MEMORY_ATTRIBUTES to guest
> > memory.  The hook would invalidate now-private parts if they have a VMA,
> > causing a SIGSEGV/EFAULT if the host touches them.
> >
> > It would forbid mappings from multiple gfns to a single offset of the
> > guest_memfd, because then the shared vs. private attribute would be tied to
> > the offset.  This should not be a problem; for example, in the case of SNP,
> > the RMP already requires a single mapping from host physical address to
> > guest physical address.
>
> I don't see how this can work.  It's not a M:1 scenario (where M is multiple 
> gfns),
> it's a 1:N scenario (wheren N is multiple offsets).  The *gfn* doesn't change 
> on
> a conversion, what needs to change to do non-in-place conversion is the pfn, 
> which
> is effectively the guest_memfd+offset pair.
>
> So yes, we *could* support non-in-place conversions within a single 
> guest_memfd,
> but it would require a second offset,

Why can't KVM free the existing page at guest_memfd+offset and
allocate a new one when doing non-in-place conversions?

> at which point it makes sense to add a
> second file descriptor as well.  Userspace could still use a single 
> guest_memfd
> instance, i.e. pass in the same file descriptor but different offsets.


Re: [PATCH v13 16/35] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory

2023-10-31 Thread David Matlack
On Tue, Oct 31, 2023 at 2:36 PM Sean Christopherson  wrote:
> On Tue, Oct 31, 2023, David Matlack wrote:
> > On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> > > Introduce an ioctl(), KVM_CREATE_GUEST_MEMFD, to allow creating file-based
> > > memory that is tied to a specific KVM virtual machine and whose primary
> > > purpose is to serve guest memory.
>
> > Maybe can you sketch out how you see this proposal being extensible to
> > using guest_memfd for shared mappings?
>
> For in-place conversions, e.g. pKVM, no additional guest_memfd is needed.  
> What's
> missing there is the ability to (safely) mmap() guest_memfd, e.g. KVM needs to
> ensure there are no outstanding references when converting back to private.
>
> For TDX/SNP, assuming we don't find a performant and robust way to do in-place
> conversions, a second fd+offset pair would be needed.

Is there a way to support non-in-place conversions within a single guest_memfd?


Re: [PATCH v13 16/35] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory

2023-10-31 Thread David Matlack
On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> Introduce an ioctl(), KVM_CREATE_GUEST_MEMFD, to allow creating file-based
> memory that is tied to a specific KVM virtual machine and whose primary
> purpose is to serve guest memory.
> 
> A guest-first memory subsystem allows for optimizations and enhancements
> that are kludgy or outright infeasible to implement/support in a generic
> memory subsystem.  With guest_memfd, guest protections and mapping sizes
> are fully decoupled from host userspace mappings.   E.g. KVM currently
> doesn't support mapping memory as writable in the guest without it also
> being writable in host userspace, as KVM's ABI uses VMA protections to
> define the allow guest protection.  Userspace can fudge this by
> establishing two mappings, a writable mapping for the guest and readable
> one for itself, but that’s suboptimal on multiple fronts.
> 
> Similarly, KVM currently requires the guest mapping size to be a strict
> subset of the host userspace mapping size, e.g. KVM doesn’t support
> creating a 1GiB guest mapping unless userspace also has a 1GiB guest
> mapping.  Decoupling the mappings sizes would allow userspace to precisely
> map only what is needed without impacting guest performance, e.g. to
> harden against unintentional accesses to guest memory.
> 
> Decoupling guest and userspace mappings may also allow for a cleaner
> alternative to high-granularity mappings for HugeTLB, which has reached a
> bit of an impasse and is unlikely to ever be merged.
> 
> A guest-first memory subsystem also provides clearer line of sight to
> things like a dedicated memory pool (for slice-of-hardware VMs) and
> elimination of "struct page" (for offload setups where userspace _never_
> needs to mmap() guest memory).

All of these use-cases involve using guest_memfd for shared pages, but
this entire series sets up KVM to only use guest_memfd for private
pages.

For example, the per-page attributes are a property of a KVM VM, not the
underlying guest_memfd. So that implies we will need separate
guest_memfds for private and shared pages. But a given memslot can have
a mix of private and shared pages. So that implies a memslot will need
to support 2 guest_memfds? But the UAPI only allows 1 and uses the HVA
for shared mappings.

My initial reaction after reading through this series is that the
per-page private/shared should be a property of the guest_memfd, not the
VM. Maybe it would even be cleaner in the long-run to make all memory
attributes a property of the guest_memfd. That way we can scope the
support to only guest_memfds and not have to worry about making per-page
attributes work with "legacy" HVA-based memslots.

Maybe can you sketch out how you see this proposal being extensible to
using guest_memfd for shared mappings?


Re: [PATCH v13 13/35] KVM: Introduce per-page memory attributes

2023-10-31 Thread David Matlack
On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> From: Chao Peng 
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 89c1a991a3b8..df573229651b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -808,6 +809,9 @@ struct kvm {
>  
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>   struct notifier_block pm_notifier;
> +#endif
> +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
> + struct xarray mem_attr_array;

Please document how access to mem_attr_array is synchronized. If I'm
reading the code correctly I think it's...

  /* Protected by slots_locks (for writes) and RCU (for reads) */


Re: [PATCH v13 03/35] KVM: Use gfn instead of hva for mmu_notifier_retry

2023-10-30 Thread David Matlack
On Mon, Oct 30, 2023 at 10:01 AM Paolo Bonzini  wrote:
>
> On Mon, Oct 30, 2023 at 5:53 PM David Matlack  wrote:
> >
> > On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> > > From: Chao Peng 
> > >
> > > Currently in mmu_notifier invalidate path, hva range is recorded and
> > > then checked against by mmu_notifier_retry_hva() in the page fault
> > > handling path. However, for the to be introduced private memory, a page
> >   
> >
> > Is there a missing word here?
>
> No but there could be missing hyphens ("for the to-be-introduced
> private memory"); possibly a "soon" could help parsing and that is
> what you were talking about?

Ah that explains it :)

>
> > >   if (likely(kvm->mmu_invalidate_in_progress == 1)) {
> > > + kvm->mmu_invalidate_range_start = INVALID_GPA;
> > > + kvm->mmu_invalidate_range_end = INVALID_GPA;
> >
> > I don't think this is incorrect, but I was a little suprised to see this
> > here rather than in end() when mmu_invalidate_in_progress decrements to
> > 0.
>
> I think that would be incorrect on the very first start?

Good point. KVM could initialize start/end before registering
notifiers, but that's extra code.


Re: [PATCH v13 03/35] KVM: Use gfn instead of hva for mmu_notifier_retry

2023-10-30 Thread David Matlack
On Mon, Oct 30, 2023 at 9:53 AM David Matlack  wrote:
>
> On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> > From: Chao Peng 
> >
> > +void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
>
> What is the reason to separate range_add() from begin()?

Nevermind, I see how it's needed in kvm_mmu_unmap_gfn_range().


Re: [PATCH v13 03/35] KVM: Use gfn instead of hva for mmu_notifier_retry

2023-10-30 Thread David Matlack
On 2023-10-27 11:21 AM, Sean Christopherson wrote:
> From: Chao Peng 
> 
> Currently in mmu_notifier invalidate path, hva range is recorded and
> then checked against by mmu_notifier_retry_hva() in the page fault
> handling path. However, for the to be introduced private memory, a page
  

Is there a missing word here?

> fault may not have a hva associated, checking gfn(gpa) makes more sense.
> 
> For existing hva based shared memory, gfn is expected to also work. The
> only downside is when aliasing multiple gfns to a single hva, the
> current algorithm of checking multiple ranges could result in a much
> larger range being rejected. Such aliasing should be uncommon, so the
> impact is expected small.
> 
> Suggested-by: Sean Christopherson 
> Cc: Xu Yilun 
> Signed-off-by: Chao Peng 
> Reviewed-by: Fuad Tabba 
> Tested-by: Fuad Tabba 
> [sean: convert vmx_set_apic_access_page_addr() to gfn-based API]
> Signed-off-by: Sean Christopherson 
> ---
>  arch/x86/kvm/mmu/mmu.c   | 10 ++
>  arch/x86/kvm/vmx/vmx.c   | 11 +--
>  include/linux/kvm_host.h | 33 
>  virt/kvm/kvm_main.c  | 41 +++-
>  4 files changed, 64 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index f7901cb4d2fa..d33657d61d80 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3056,7 +3056,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, 
> u64 *sptep)
>   *
>   * There are several ways to safely use this helper:
>   *
> - * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, 
> before
> + * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, 
> before
>   *   consuming it.  In this case, mmu_lock doesn't need to be held during the
>   *   lookup, but it does need to be held while checking the MMU notifier.
>   *
> @@ -4358,7 +4358,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>   return true;
>  
>   return fault->slot &&
> -mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
> +mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
>  }
>  
>  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
> @@ -6245,7 +6245,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t 
> gfn_start, gfn_t gfn_end)
>  
>   write_lock(>mmu_lock);
>  
> - kvm_mmu_invalidate_begin(kvm, 0, -1ul);
> + kvm_mmu_invalidate_begin(kvm);
> +
> + kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);
>  
>   flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
>  
> @@ -6255,7 +6257,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t 
> gfn_start, gfn_t gfn_end)
>   if (flush)
>   kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - 
> gfn_start);
>  
> - kvm_mmu_invalidate_end(kvm, 0, -1ul);
> + kvm_mmu_invalidate_end(kvm);
>  
>   write_unlock(>mmu_lock);
>  }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 72e3943f3693..6e502ba93141 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6757,10 +6757,10 @@ static void vmx_set_apic_access_page_addr(struct 
> kvm_vcpu *vcpu)
>   return;
>  
>   /*
> -  * Grab the memslot so that the hva lookup for the mmu_notifier retry
> -  * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
> -  * on the pfn lookup's validation of the memslot to ensure a valid hva
> -  * is used for the retry check.
> +  * Explicitly grab the memslot using KVM's internal slot ID to ensure
> +  * KVM doesn't unintentionally grab a userspace memslot.  It _should_
> +  * be impossible for userspace to create a memslot for the APIC when
> +  * APICv is enabled, but paranoia won't hurt in this case.
>*/
>   slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
>   if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
> @@ -6785,8 +6785,7 @@ static void vmx_set_apic_access_page_addr(struct 
> kvm_vcpu *vcpu)
>   return;
>  
>   read_lock(>kvm->mmu_lock);
> - if (mmu_invalidate_retry_hva(kvm, mmu_seq,
> -  gfn_to_hva_memslot(slot, gfn))) {
> + if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
>   kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
>   read_unlock(>kvm->mmu_lock);
>   goto out;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index fb6c6109fdca..11d091688346 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -787,8 +787,8 @@ struct kvm {
>   struct mmu_notifier mmu_notifier;
>   unsigned long mmu_invalidate_seq;
>   long mmu_invalidate_in_progress;
> - unsigned long mmu_invalidate_range_start;
> - unsigned long mmu_invalidate_range_end;
> + gfn_t 

Re: [PATCH v2 0/4] KVM: Refactor KVM stats macros and enable custom stat names

2023-05-09 Thread David Matlack
On Mon, Mar 6, 2023 at 11:01 AM David Matlack  wrote:
>
> This series refactors the KVM stats macros to reduce duplication and
> adds the support for choosing custom names for stats.

Hi Paolo,

I just wanted to double-check if this series is on your radar
(probably for 6.5)?


[PATCH v2 4/4] KVM: x86: Drop union for pages_{4k,2m,1g} stats

2023-03-06 Thread David Matlack
Drop the union for the pages_{4k,2m,1g} stats. The union is no longer
necessary now that KVM supports choosing a custom name for stats.

Eliminating the union also would allow future commits to more easily
move pages[] into common code, e.g. if KVM ever gains support for a
common page table code.

An alternative would be to drop pages[] and have kvm_update_page_stats()
update pages_{4k,2m,1g} directly. But that's not a good direction to go
in since other architectures use other page sizes.

No functional change intended.

Link: https://lore.kernel.org/kvm/20221208193857.4090582-1-dmatl...@google.com/
Signed-off-by: David Matlack 
---
 arch/x86/include/asm/kvm_host.h | 9 +
 arch/x86/kvm/x86.c  | 6 +++---
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 808c292ad3f4..a59e41355ef4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1473,14 +1473,7 @@ struct kvm_vm_stat {
u64 mmu_recycled;
u64 mmu_cache_miss;
u64 mmu_unsync;
-   union {
-   struct {
-   atomic64_t pages_4k;
-   atomic64_t pages_2m;
-   atomic64_t pages_1g;
-   };
-   atomic64_t pages[KVM_NR_PAGE_SIZES];
-   };
+   atomic64_t pages[KVM_NR_PAGE_SIZES];
u64 nx_lpage_splits;
u64 max_mmu_page_hash_collisions;
u64 max_mmu_rmap_size;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 072f5ba83170..101ad6b7e7b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -240,9 +240,9 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_STAT(VM, CUMULATIVE, NONE, mmu_recycled),
KVM_STAT(VM, CUMULATIVE, NONE, mmu_cache_miss),
KVM_STAT(VM, INSTANT, NONE, mmu_unsync),
-   KVM_STAT(VM, INSTANT, NONE, pages_4k),
-   KVM_STAT(VM, INSTANT, NONE, pages_2m),
-   KVM_STAT(VM, INSTANT, NONE, pages_1g),
+   __KVM_STAT(VM, INSTANT, NONE, pages[PG_LEVEL_4K - 1], "pages_4k"),
+   __KVM_STAT(VM, INSTANT, NONE, pages[PG_LEVEL_2M - 1], "pages_2m"),
+   __KVM_STAT(VM, INSTANT, NONE, pages[PG_LEVEL_1G - 1], "pages_1g"),
KVM_STAT(VM, INSTANT, NONE, nx_lpage_splits),
KVM_STAT(VM, PEAK, NONE, max_mmu_rmap_size),
KVM_STAT(VM, PEAK, NONE, max_mmu_page_hash_collisions)
-- 
2.40.0.rc0.216.gc4246ad0f0-goog



[PATCH v2 3/4] KVM: Allow custom names for KVM_STAT()

2023-03-06 Thread David Matlack
Allow custom names to be specified for stats built on KVM_STAT() via a
new inner macro __KVM_STAT(). e.g.

  KVM_STAT(VM, CUMULATIVE, NONE, foo),
  __KVM_STAT(VM, CUMULATIVE, NONE, bar, "custom_name"),
  ...

Custom name support enables decoupling the userspace-visible stat names
from their internal representation in C. This can allow future commits
to refactor the various stats structs without impacting userspace tools
that read KVM stats.

This also allows stats to be stored in data structures such as arrays,
without needing unions to access specific stats. For example, the union
for pages_{4k,2m,1g} is no longer necessary. At Google, we have several
other out-of-tree stats that would benefit from this support.

No functional change intended.

Link: 
https://lore.kernel.org/all/20211019000459.3163029-1-jingzhan...@google.com/
Suggested-by: Jing Zhang 
Signed-off-by: David Matlack 
---
 include/linux/kvm_host.h | 35 +++
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6673ae757c4e..fa026e8997b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1761,40 +1761,43 @@ struct _kvm_stats_desc {
.name = _name, \
 }
 
-#define VM_GENERIC_STATS_DESC(_stat, _type, _unit, _base, _exponent, _size,
\
- _bucket_size)\
-   _KVM_STATS_DESC(struct kvm_vm_stat, generic._stat, #_stat, _type,  \
+#define VM_GENERIC_STATS_DESC(_stat, _name, _type, _unit, _base, _exponent,
\
+ _size, _bucket_size) \
+   _KVM_STATS_DESC(struct kvm_vm_stat, generic._stat, _name, _type,   \
_unit, _base, _exponent, _size, _bucket_size)
 
-#define VCPU_GENERIC_STATS_DESC(_stat, _type, _unit, _base, _exponent, _size,  
\
-   _bucket_size)  \
-   _KVM_STATS_DESC(struct kvm_vcpu_stat, generic._stat, #_stat, _type,\
+#define VCPU_GENERIC_STATS_DESC(_stat, _name, _type, _unit, _base, _exponent,  
\
+   _size, _bucket_size)   \
+   _KVM_STATS_DESC(struct kvm_vcpu_stat, generic._stat, _name, _type, \
_unit, _base, _exponent, _size, _bucket_size)
 
-#define VM_STATS_DESC(_stat, _type, _unit, _base, _exponent, _size,   \
+#define VM_STATS_DESC(_stat, _name, _type, _unit, _base, _exponent, _size, 
\
  _bucket_size)\
-   _KVM_STATS_DESC(struct kvm_vm_stat, _stat, #_stat, _type, _unit,   \
+   _KVM_STATS_DESC(struct kvm_vm_stat, _stat, _name, _type, _unit,\
_base, _exponent, _size, _bucket_size)
 
-#define VCPU_STATS_DESC(_stat, _type, _unit, _base, _exponent, _size, \
+#define VCPU_STATS_DESC(_stat, _name, _type, _unit, _base, _exponent, _size,   
\
_bucket_size)  \
-   _KVM_STATS_DESC(struct kvm_vcpu_stat, _stat, #_stat, _type, _unit, \
+   _KVM_STATS_DESC(struct kvm_vcpu_stat, _stat, _name, _type, _unit,  \
_base, _exponent, _size, _bucket_size)
 
 /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
-#define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz)
   \
-   SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
+#define STATS_DESC(SCOPE, stat, name, type, unit, base, exp, sz, bsz) \
+   SCOPE##_STATS_DESC(stat, name, type, unit, base, exp, sz, bsz)
 
-#define KVM_STAT(SCOPE, TYPE, UNIT, _stat)\
-   STATS_DESC(SCOPE, _stat, KVM_STATS_TYPE_##TYPE,\
+#define __KVM_STAT(SCOPE, TYPE, UNIT, _stat, _name)   \
+   STATS_DESC(SCOPE, _stat, _name, KVM_STATS_TYPE_##TYPE, \
   KVM_STATS_UNIT_##UNIT, KVM_STATS_BASE_POW10, 0, 1, 0)
 
+#define KVM_STAT(SCOPE, TYPE, UNIT, _stat)\
+   __KVM_STAT(SCOPE, TYPE, UNIT, _stat, #_stat)
+
 #define KVM_STAT_NSEC(SCOPE, _stat)   \
-   STATS_DESC(SCOPE, _stat, KVM_STATS_TYPE_CUMULATIVE,\
+   STATS_DESC(SCOPE, _stat, #_stat, KVM_STATS_TYPE_CUMULATIVE,\
   KVM_STATS_UNIT_SECONDS, KVM_STATS_BASE_POW10, -9, 1, 0)
 
 #define KVM_HIST_NSEC(SCOPE, TYPE, _stat, _size, _bucket_size)\
-   STATS_DESC(VCPU_GENERIC, _stat, KVM_STATS_TYPE_##TYPE##_HIST,  \
+   STATS_DESC(VCPU_GENERIC, _stat, #_stat, KVM_STATS_TYPE_##TYPE##_HIST,  \
   KVM_STATS_UNIT_SECONDS, KVM_STATS_BASE_POW10, -9,   \
   _size, _bucket_size)
 
-- 
2.40.0.rc0.216.g

[PATCH v2 1/4] KVM: Refactor stats descriptor generation macros

2023-03-06 Thread David Matlack
Refactor the various KVM stats macros to reduce the amount of duplicate
macro code. This change also improves readability by spelling out
"CUMULATIVE", "INSTANT", and "PEAK" instead of the previous short-hands
which were less clear ("COUNTER", "ICOUNTER", and "PCOUNTER").

No functional change intended.

Suggested-by: Sean Christopherson 
Signed-off-by: David Matlack 
---
 arch/arm64/kvm/guest.c|  14 +--
 arch/mips/kvm/mips.c  |  54 +--
 arch/powerpc/kvm/book3s.c |  62 ++--
 arch/powerpc/kvm/booke.c  |  48 -
 arch/riscv/kvm/vcpu.c |  16 +--
 arch/s390/kvm/kvm-s390.c  | 198 +++---
 arch/x86/kvm/x86.c|  94 +-
 include/linux/kvm_host.h  |  95 ++
 8 files changed, 272 insertions(+), 309 deletions(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 07444fa22888..890ed444c237 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -44,13 +44,13 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
-   STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
-   STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
-   STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
-   STATS_DESC_COUNTER(VCPU, mmio_exit_user),
-   STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
-   STATS_DESC_COUNTER(VCPU, signal_exits),
-   STATS_DESC_COUNTER(VCPU, exits)
+   KVM_STAT(VCPU, CUMULATIVE, NONE, hvc_exit_stat),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, wfe_exit_stat),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, wfi_exit_stat),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, mmio_exit_user),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, mmio_exit_kernel),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, signal_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, exits)
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 36c8991b5d39..b7b2fa400bcf 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -53,34 +53,34 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
-   STATS_DESC_COUNTER(VCPU, wait_exits),
-   STATS_DESC_COUNTER(VCPU, cache_exits),
-   STATS_DESC_COUNTER(VCPU, signal_exits),
-   STATS_DESC_COUNTER(VCPU, int_exits),
-   STATS_DESC_COUNTER(VCPU, cop_unusable_exits),
-   STATS_DESC_COUNTER(VCPU, tlbmod_exits),
-   STATS_DESC_COUNTER(VCPU, tlbmiss_ld_exits),
-   STATS_DESC_COUNTER(VCPU, tlbmiss_st_exits),
-   STATS_DESC_COUNTER(VCPU, addrerr_st_exits),
-   STATS_DESC_COUNTER(VCPU, addrerr_ld_exits),
-   STATS_DESC_COUNTER(VCPU, syscall_exits),
-   STATS_DESC_COUNTER(VCPU, resvd_inst_exits),
-   STATS_DESC_COUNTER(VCPU, break_inst_exits),
-   STATS_DESC_COUNTER(VCPU, trap_inst_exits),
-   STATS_DESC_COUNTER(VCPU, msa_fpe_exits),
-   STATS_DESC_COUNTER(VCPU, fpe_exits),
-   STATS_DESC_COUNTER(VCPU, msa_disabled_exits),
-   STATS_DESC_COUNTER(VCPU, flush_dcache_exits),
-   STATS_DESC_COUNTER(VCPU, vz_gpsi_exits),
-   STATS_DESC_COUNTER(VCPU, vz_gsfc_exits),
-   STATS_DESC_COUNTER(VCPU, vz_hc_exits),
-   STATS_DESC_COUNTER(VCPU, vz_grr_exits),
-   STATS_DESC_COUNTER(VCPU, vz_gva_exits),
-   STATS_DESC_COUNTER(VCPU, vz_ghfc_exits),
-   STATS_DESC_COUNTER(VCPU, vz_gpa_exits),
-   STATS_DESC_COUNTER(VCPU, vz_resvd_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, wait_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, cache_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, signal_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, int_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, cop_unusable_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmod_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmiss_ld_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, tlbmiss_st_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, addrerr_st_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, addrerr_ld_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, syscall_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, resvd_inst_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, break_inst_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, trap_inst_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, msa_fpe_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, fpe_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, msa_disabled_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, flush_dcache_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_gpsi_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_gsfc_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_hc_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_grr_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_gva_exits),
+   KVM_STAT(VCPU, CUMULATIVE, NONE, vz_ghfc_exits),
+   KVM_

[PATCH v2 2/4] KVM: Refactor designated initializer macros for struct _kvm_stats_desc

2023-03-06 Thread David Matlack
Refactor the macros that generate struct _kvm_stats_desc designated
initializers to cut down on duplication.

No functional change intended.

Signed-off-by: David Matlack 
---
 include/linux/kvm_host.h | 75 +++-
 1 file changed, 35 insertions(+), 40 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 02b1151c2753..6673ae757c4e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1744,47 +1744,42 @@ struct _kvm_stats_desc {
char name[KVM_STATS_NAME_SIZE];
 };
 
-#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \
-   .flags = type | unit | base |  \
-BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |  \
-BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |  \
-BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),   \
-   .exponent = exp,   \
-   .size = sz,\
-   .bucket_size = bsz
-
-#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)   \
+/* Generates a designated initializer list for a struct _kvm_stats_desc. */
+#define _KVM_STATS_DESC(_struct, _field, _name, _type, _unit, _base,  \
+   _exponent, _size, _bucket_size)\
+{ \
{  \
-   {  \
-   STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
-   .offset = offsetof(struct kvm_vm_stat, generic.stat)   \
-   }, \
-   .name = #stat, \
-   }
-#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
-   {  \
-   {  \
-   STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
-   .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
-   }, \
-   .name = #stat, \
-   }
-#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)   \
-   {  \
-   {  \
-   STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
-   .offset = offsetof(struct kvm_vm_stat, stat)   \
-   }, \
-   .name = #stat, \
-   }
-#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
-   {  \
-   {  \
-   STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
-   .offset = offsetof(struct kvm_vcpu_stat, stat) \
-   }, \
-   .name = #stat, \
-   }
+   .flags = _type | _unit | _base |   \
+BUILD_BUG_ON_ZERO(_type & ~KVM_STATS_TYPE_MASK) | \
+BUILD_BUG_ON_ZERO(_unit & ~KVM_STATS_UNIT_MASK) | \
+BUILD_BUG_ON_ZERO(_base & ~KVM_STATS_BASE_MASK),  \
+   .exponent = _exponent, \
+   .size = _size, \
+   .bucket_size = _bucket_size,   \
+   .offset = offsetof(_struct, _field),   \
+   }, \
+   .name = _name, \
+}
+
+#define VM_GENERIC_STATS_DESC(_stat, _type, _unit, _base, _exponent, _size,
\
+ _bucket_size)\
+   _KVM_STATS_DESC(struct kvm_vm_stat, generic._stat, #_stat, _type,  \
+   _unit, _base, _exponent, _size, _bucket_size)
+
+#define VCPU_GENERIC_STATS_DESC(_stat, _type, _unit,

[PATCH v2 0/4] KVM: Refactor KVM stats macros and enable custom stat names

2023-03-06 Thread David Matlack
This series refactors the KVM stats macros to reduce duplication and
adds the support for choosing custom names for stats.

Custom name makes it possible to decouple the userspace-visible stat
names from their internal representation in C. This can allow future
commits to refactor the various stats structs without impacting
userspace tools that read KVM stats.

This also allows stats to be stored in data structures such as arrays,
without needing unions to access specific stats. Case in point, the last
patch in this series removes the pages_{4k,2m,1g} union, which is a
useful cleanup to prepare for sharing paging code across architectures
[1].

And for full transparency, another motivation for this series it that at
Google we have several out-of-tree stats that use arrays. Custom name
support is something we added internally and it reduces our technical
debt to get the support merged upstream.

Tested on x86. Compile tested on ARM. Not yet tested on any other
architectures.

Link: https://lore.kernel.org/kvm/20221208193857.4090582-1-dmatl...@google.com/

v2:
 - Refactor stat macros (patch 1) to reduce duplication and make it
   simpler to add custom name support [Sean]

v1: https://lore.kernel.org/kvm/20230118175300.790835-1-dmatl...@google.com/

David Matlack (4):
  KVM: Refactor stats descriptor generation macros
  KVM: Refactor designated initializer macros for struct _kvm_stats_desc
  KVM: Allow custom names for KVM_STAT()
  KVM: x86: Drop union for pages_{4k,2m,1g} stats

 arch/arm64/kvm/guest.c  |  14 +--
 arch/mips/kvm/mips.c|  54 -
 arch/powerpc/kvm/book3s.c   |  62 +-
 arch/powerpc/kvm/booke.c|  48 
 arch/riscv/kvm/vcpu.c   |  16 +--
 arch/s390/kvm/kvm-s390.c| 198 
 arch/x86/include/asm/kvm_host.h |   9 +-
 arch/x86/kvm/x86.c  |  94 +++
 include/linux/kvm_host.h| 179 +++--
 9 files changed, 314 insertions(+), 360 deletions(-)


base-commit: 45dd9bc75d9adc9483f0c7d662ba6e73ed698a0b
-- 
2.40.0.rc0.216.gc4246ad0f0-goog



Re: [PATCH V5 5/5] powerpc/kvm/stats: Implement existing and add new halt polling vcpu stats

2016-07-22 Thread David Matlack via Linuxppc-dev
On Thu, Jul 21, 2016 at 8:41 PM, Suraj Jitindar Singh
<sjitindarsi...@gmail.com> wrote:
> vcpu stats are used to collect information about a vcpu which can be viewed
> in the debugfs. For example halt_attempted_poll and halt_successful_poll
> are used to keep track of the number of times the vcpu attempts to and
> successfully polls. These stats are currently not used on powerpc.
>
> Implement incrementation of the halt_attempted_poll and
> halt_successful_poll vcpu stats for powerpc. Since these stats are summed
> over all the vcpus for all running guests it doesn't matter which vcpu
> they are attributed to, thus we choose the current runner vcpu of the
> vcore.
>
> Also add new vcpu stats: halt_poll_success_ns, halt_poll_fail_ns and
> halt_wait_ns to be used to accumulate the total time spend polling
> successfully, polling unsuccessfully and waiting respectively, and
> halt_successful_wait to accumulate the number of times the vcpu waits.
> Given that halt_poll_success_ns, halt_poll_fail_ns and halt_wait_ns are
> expressed in nanoseconds it is necessary to represent these as 64-bit
> quantities, otherwise they would overflow after only about 4 seconds.
>
> Given that the total time spend either polling or waiting will be known and
> the number of times that each was done, it will be possible to determine
> the average poll and wait times. This will give the ability to tune the kvm
> module parameters based on the calculated average wait and poll times.
>
> Signed-off-by: Suraj Jitindar Singh <sjitindarsi...@gmail.com>
> Reviewed-by: David Matlack <dmatl...@google.com>
>
> ---
> Change Log:
>
> V3 -> V4:
> - Instead of accounting just wait and poll time, separate these
>   into successful_poll_time, failed_poll_time and wait_time.
> V4 -> V5:
> - Add single_task_running() check to polling loop

I was expecting to see this in PATCH 3/5 with the halt-polling
implementation. But otherwise, looks good, and the net effect is the
same.

> ---
>  arch/powerpc/include/asm/kvm_host.h |  4 
>  arch/powerpc/kvm/book3s.c   |  4 
>  arch/powerpc/kvm/book3s_hv.c| 38 
> +++--
>  3 files changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index f6304c5..f15ffc0 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -114,8 +114,12 @@ struct kvm_vcpu_stat {
> u64 emulated_inst_exits;
> u64 dec_exits;
> u64 ext_intr_exits;
> +   u64 halt_poll_success_ns;
> +   u64 halt_poll_fail_ns;
> +   u64 halt_wait_ns;
> u64 halt_successful_poll;
> u64 halt_attempted_poll;
> +   u64 halt_successful_wait;
> u64 halt_poll_invalid;
> u64 halt_wakeup;
> u64 dbell_exits;
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 47018fc..71eb8f3 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> { "dec", VCPU_STAT(dec_exits) },
> { "ext_intr",VCPU_STAT(ext_intr_exits) },
> { "queue_intr",  VCPU_STAT(queue_intr) },
> +   { "halt_poll_success_ns",   VCPU_STAT(halt_poll_success_ns) },
> +   { "halt_poll_fail_ns",  VCPU_STAT(halt_poll_fail_ns) },
> +   { "halt_wait_ns",   VCPU_STAT(halt_wait_ns) },
> { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
> { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
> +   { "halt_successful_wait",   VCPU_STAT(halt_successful_wait) },
> { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
> { "halt_wakeup", VCPU_STAT(halt_wakeup) },
> { "pf_storage",  VCPU_STAT(pf_storage) },
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index a9de1d4..b1d9e88 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2679,15 +2679,16 @@ static int kvmppc_vcore_check_block(struct 
> kvmppc_vcore *vc)
>   */
>  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  {
> +   ktime_t cur, start_poll, start_wait;
> int do_sleep = 1;
> -   ktime_t cur, start;
> u64 block_ns;
> DECLARE_SWAITQUEUE(wait);
>
> /* Poll for pending exceptions and ceded state */
> -   cur = start = ktime_get();
> +   cur = start_poll = ktime_get();
> if (vc->halt_

Re: [PATCH V4 5/5] powerpc/kvm/stats: Implement existing and add new halt polling vcpu stats

2016-07-19 Thread David Matlack
On Tue, Jul 19, 2016 at 1:12 AM, Suraj Jitindar Singh
<sjitindarsi...@gmail.com> wrote:
> vcpu stats are used to collect information about a vcpu which can be viewed
> in the debugfs. For example halt_attempted_poll and halt_successful_poll
> are used to keep track of the number of times the vcpu attempts to and
> successfully polls. These stats are currently not used on powerpc.
>
> Implement incrementation of the halt_attempted_poll and
> halt_successful_poll vcpu stats for powerpc. Since these stats are summed
> over all the vcpus for all running guests it doesn't matter which vcpu
> they are attributed to, thus we choose the current runner vcpu of the
> vcore.
>
> Also add new vcpu stats: halt_poll_success_ns, halt_poll_fail_ns and
> halt_wait_ns to be used to accumulate the total time spend polling
> successfully, polling unsuccessfully and waiting respectively, and
> halt_successful_wait to accumulate the number of times the vcpu waits.
> Given that halt_poll_success_ns, halt_poll_fail_ns and halt_wait_ns are
> expressed in nanoseconds it is necessary to represent these as 64-bit
> quantities, otherwise they would overflow after only about 4 seconds.
>
> Given that the total time spend either polling or waiting will be known and
> the number of times that each was done, it will be possible to determine
> the average poll and wait times. This will give the ability to tune the kvm
> module parameters based on the calculated average wait and poll times.
>
> Signed-off-by: Suraj Jitindar Singh <sjitindarsi...@gmail.com>

Reviewed-by: David Matlack <dmatl...@google.com>

>
> ---
> Change Log:
>
> V3 -> V4:
> - Instead of accounting just wait and poll time, separate these
>   into successful_poll_time, failed_poll_time and wait_time.
> ---
>  arch/powerpc/include/asm/kvm_host.h |  4 
>  arch/powerpc/kvm/book3s.c   |  4 
>  arch/powerpc/kvm/book3s_hv.c| 36 +++-
>  3 files changed, 39 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index f6304c5..f15ffc0 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -114,8 +114,12 @@ struct kvm_vcpu_stat {
> u64 emulated_inst_exits;
> u64 dec_exits;
> u64 ext_intr_exits;
> +   u64 halt_poll_success_ns;
> +   u64 halt_poll_fail_ns;
> +   u64 halt_wait_ns;
> u64 halt_successful_poll;
> u64 halt_attempted_poll;
> +   u64 halt_successful_wait;
> u64 halt_poll_invalid;
> u64 halt_wakeup;
> u64 dbell_exits;
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 47018fc..71eb8f3 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> { "dec", VCPU_STAT(dec_exits) },
> { "ext_intr",VCPU_STAT(ext_intr_exits) },
> { "queue_intr",  VCPU_STAT(queue_intr) },
> +   { "halt_poll_success_ns",   VCPU_STAT(halt_poll_success_ns) },
> +   { "halt_poll_fail_ns",  VCPU_STAT(halt_poll_fail_ns) },
> +   { "halt_wait_ns",   VCPU_STAT(halt_wait_ns) },
> { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
> { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
> +   { "halt_successful_wait",   VCPU_STAT(halt_successful_wait) },
> { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
> { "halt_wakeup", VCPU_STAT(halt_wakeup) },
> { "pf_storage",  VCPU_STAT(pf_storage) },
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index a9de1d4..81072f2 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2679,15 +2679,16 @@ static int kvmppc_vcore_check_block(struct 
> kvmppc_vcore *vc)
>   */
>  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  {
> +   ktime_t cur, start_poll, start_wait;
> int do_sleep = 1;
> -   ktime_t cur, start;
> u64 block_ns;
> DECLARE_SWAITQUEUE(wait);
>
> /* Poll for pending exceptions and ceded state */
> -   cur = start = ktime_get();
> +   cur = start_poll = ktime_get();
> if (vc->halt_poll_ns) {
> -   ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
> +   ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
> +   ++vc->runner->stat.halt_attempted_p

Re: [PATCH V4 4/5] kvm/stats: Add provisioning for ulong vm stats and u64 vcpu stats

2016-07-19 Thread David Matlack via Linuxppc-dev
On Tue, Jul 19, 2016 at 1:12 AM, Suraj Jitindar Singh
<sjitindarsi...@gmail.com> wrote:
> vms and vcpus have statistics associated with them which can be viewed
> within the debugfs. Currently it is assumed within the vcpu_stat_get() and
> vm_stat_get() functions that all of these statistics are represented as
> u32s, however the next patch adds some u64 vcpu statistics.
>
> Change all vcpu statistics to u64 and modify vcpu_stat_get() accordingly.
> Since vcpu statistics are per vcpu, they will only be updated by a single
> vcpu at a time so this shouldn't present a problem on 32-bit machines
> which can't atomically increment 64-bit numbers. However vm statistics
> could potentially be updated by multiple vcpus from that vm at a time.
> To avoid the overhead of atomics make all vm statistics ulong such that
> they are 64-bit on 64-bit systems where they can be atomically incremented
> and are 32-bit on 32-bit systems which may not be able to atomically
> increment 64-bit numbers. Modify vm_stat_get() to expect ulongs.
>
> Signed-off-by: Suraj Jitindar Singh <sjitindarsi...@gmail.com>

Looks great, thanks.

Reviewed-by: David Matlack <dmatl...@google.com>

>
> ---
> Change Log:
>
> V2 -> V3:
> - Instead of implementing separate u32 and u64 functions keep the
>   generic functions and modify them to expect u64s. Thus update all
>   vm and vcpu statistics to u64s accordingly.
> V3 -> V4:
> - Change vm_stats from u64 to ulong
> ---
>  arch/arm/include/asm/kvm_host.h |  12 ++--
>  arch/arm64/include/asm/kvm_host.h   |  12 ++--
>  arch/mips/include/asm/kvm_host.h|  46 ++---
>  arch/powerpc/include/asm/kvm_host.h |  60 -
>  arch/s390/include/asm/kvm_host.h| 128 
> ++--
>  arch/x86/include/asm/kvm_host.h |  72 ++--
>  virt/kvm/kvm_main.c |   4 +-
>  7 files changed, 167 insertions(+), 167 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 96387d4..c8e55b3b 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -183,15 +183,15 @@ struct kvm_vcpu_arch {
>  };
>
>  struct kvm_vm_stat {
> -   u32 remote_tlb_flush;
> +   ulong remote_tlb_flush;
>  };
>
>  struct kvm_vcpu_stat {
> -   u32 halt_successful_poll;
> -   u32 halt_attempted_poll;
> -   u32 halt_poll_invalid;
> -   u32 halt_wakeup;
> -   u32 hvc_exit_stat;
> +   u64 halt_successful_poll;
> +   u64 halt_attempted_poll;
> +   u64 halt_poll_invalid;
> +   u64 halt_wakeup;
> +   u64 hvc_exit_stat;
> u64 wfe_exit_stat;
> u64 wfi_exit_stat;
> u64 mmio_exit_user;
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 49095fc..b14c8bc 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -291,15 +291,15 @@ struct kvm_vcpu_arch {
>  #endif
>
>  struct kvm_vm_stat {
> -   u32 remote_tlb_flush;
> +   ulong remote_tlb_flush;
>  };
>
>  struct kvm_vcpu_stat {
> -   u32 halt_successful_poll;
> -   u32 halt_attempted_poll;
> -   u32 halt_poll_invalid;
> -   u32 halt_wakeup;
> -   u32 hvc_exit_stat;
> +   u64 halt_successful_poll;
> +   u64 halt_attempted_poll;
> +   u64 halt_poll_invalid;
> +   u64 halt_wakeup;
> +   u64 hvc_exit_stat;
> u64 wfe_exit_stat;
> u64 wfi_exit_stat;
> u64 mmio_exit_user;
> diff --git a/arch/mips/include/asm/kvm_host.h 
> b/arch/mips/include/asm/kvm_host.h
> index 36a391d..9704888 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -98,32 +98,32 @@ extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
>  extern bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
>
>  struct kvm_vm_stat {
> -   u32 remote_tlb_flush;
> +   ulong remote_tlb_flush;
>  };
>
>  struct kvm_vcpu_stat {
> -   u32 wait_exits;
> -   u32 cache_exits;
> -   u32 signal_exits;
> -   u32 int_exits;
> -   u32 cop_unusable_exits;
> -   u32 tlbmod_exits;
> -   u32 tlbmiss_ld_exits;
> -   u32 tlbmiss_st_exits;
> -   u32 addrerr_st_exits;
> -   u32 addrerr_ld_exits;
> -   u32 syscall_exits;
> -   u32 resvd_inst_exits;
> -   u32 break_inst_exits;
> -   u32 trap_inst_exits;
> -   u32 msa_fpe_exits;
> -   u32 fpe_exits;
> -   u32 msa_disabled_exits;
> -   u32 flush_dcache_exits;
> -   u32 halt_successful_poll;
> -   u32 halt_att

Re: [PATCH V4 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module

2016-07-19 Thread David Matlack via Linuxppc-dev
On Tue, Jul 19, 2016 at 1:12 AM, Suraj Jitindar Singh
 wrote:
> This patch introduces new halt polling functionality into the kvm_hv kernel
> module. When a vcore is idle it will poll for some period of time before
> scheduling itself out.
>
> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
> idle) we schedule ourselves out to allow something else to run. In the
> event that we need to wake up very quickly (for example an interrupt
> arrives), we are required to wait until we get scheduled again.
>
> Implement halt polling so that when a vcore is idle, and before scheduling
> ourselves, we poll for vcpus in the runnable_threads list which have
> pending exceptions or which leave the ceded state. If we poll successfully
> then we can get back into the guest very quickly without ever scheduling
> ourselves, otherwise we schedule ourselves out as before.
>
> Testing of this patch with a TCP round robin test between two guests with
> virtio network interfaces has found a decrease in round trip time of ~15us
> on average. A performance gain is only seen when going out of and
> back into the guest often and quickly, otherwise there is no net benefit
> from the polling. The polling interval is adjusted such that when we are
> often scheduled out for long periods of time it is reduced, and when we
> often poll successfully it is increased. The rate at which the polling
> interval increases or decreases, and the maximum polling interval, can
> be set through module parameters.
>
> Based on the implementation in the generic kvm module by Wanpeng Li and
> Paolo Bonzini, and on direction from Paul Mackerras.
>
> Signed-off-by: Suraj Jitindar Singh 
> ---
>  arch/powerpc/include/asm/kvm_book3s.h |   1 +
>  arch/powerpc/include/asm/kvm_host.h   |   1 +
>  arch/powerpc/kvm/book3s_hv.c  | 116 
> ++
>  arch/powerpc/kvm/trace_hv.h   |  22 +++
>  4 files changed, 126 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 151f817..c261f52 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
> ulong pcr;
> ulong dpdes;/* doorbell state (POWER8) */
> ulong conferring_threads;
> +   unsigned int halt_poll_ns;
>  };
>
>  struct kvmppc_vcpu_book3s {
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 02d06e9..610f393 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ struct kvm_arch {
>  #define VCORE_SLEEPING 3
>  #define VCORE_RUNNING  4
>  #define VCORE_EXITING  5
> +#define VCORE_POLLING  6
>
>  /*
>   * Struct used to manage memory for a virtual processor area
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 3bcf9e6..a9de1d4 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, _param_ops, 
> _ipi_redirect,
>  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host 
> core");
>  #endif
>
> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
> +
> +/* Factor by which the vcore halt poll interval is grown, default is to 
> double
> + */
> +static unsigned int halt_poll_ns_grow = 2;
> +module_param(halt_poll_ns_grow, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
> +
> +/* Factor by which the vcore halt poll interval is shrunk, default is to 
> reset
> + */
> +static unsigned int halt_poll_ns_shrink;
> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
> +
>  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
>
> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore 
> *vc,
> finish_wait(>arch.cpu_run, );
>  }
>
> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> +   /* 10us base */
> +   if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
> +   vc->halt_poll_ns = 1;
> +   else
> +   vc->halt_poll_ns *= halt_poll_ns_grow;
> +
> +   if (vc->halt_poll_ns > halt_poll_max_ns)
> +   vc->halt_poll_ns = halt_poll_max_ns;
> +}
> +
> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> +   if (halt_poll_ns_shrink == 0)
> +   vc->halt_poll_ns = 0;
> +   else
> +   vc->halt_poll_ns /= halt_poll_ns_shrink;
> +}
> +
> +/* Check 

Re: [PATCH V2 5/5] powerpc/kvm/stats: Implement existing and add new halt polling vcpu stats

2016-07-13 Thread David Matlack via Linuxppc-dev
On Tue, Jul 12, 2016 at 11:07 PM, Suraj Jitindar Singh
<sjitindarsi...@gmail.com> wrote:
> On 12/07/16 16:17, Suraj Jitindar Singh wrote:
>> On 12/07/16 02:49, David Matlack wrote:
[snip]
>>> It's possible to poll and wait in one halt, conflating this stat with
>>> polling time. Is it useful to split out a third stat,
>>> halt_poll_fail_ns which counts how long we polled which ended up
>>> sleeping? Then halt_wait_time only counts the time the VCPU spent on
>>> the wait queue. The sum of all 3 is still the total time spent halted.
>>>
>> I see what you're saying. I would say that in the event that you do wait
>> then the most useful number is going to be the total block time (the sum
>> of the wait and poll time) as this is the minimum value you would have to
>> set the halt_poll_max_ns module parameter in order to ensure you poll
>> for long enough (in most circumstances) to avoid waiting, which is the main
>> use case I envision for this statistic. That being said this is definitely
>> a source of ambiguity and splitting this into two statistics would make the
>> distinction clearer without any loss of data, you could simply sum the two
>> stats to get the same number.
>>
>> Either way I don't think it really makes much of a difference, but in the
>> interest of clarity I think I'll split the statistic.
>
> On further though, I really think that splitting this statistic is an
> unnecessary source of ambiguity. In reality the interesting piece of
> information is going to be the average time that you blocked on
> either an unsuccessful poll or a successful poll.
>
> So instead of splitting the statistic I'm going to rename them as:
> halt_poll_time -> halt_block_time_successful_poll
> halt_wait_time -> halt_block_time_waited

The downside of having only these 2 stats is there is no way to see
the total time spent halt-polling. Halt-polling shows up as host
kernel CPU usage on the VCPU thread, despite it really being idle
cycles that could be reclaimed. It's useful to have the total amount
of time spent halt-polling (halt_poll_fail + halt_poll_success) to
feed into provisioning/monitoring systems that look at CPU usage.

FWIW, I have a very similar patch internally. It adds 2 stats,
halt_poll_success_ns and halt_poll_fail_ns, to the halt-polling code
in virt/kvm/kvm_main.c. So if you agree splitting the stats makes
sense, it would be helpful to us if we can adopt the same naming
convention.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 4/5] kvm/stats: Add provisioning for 64-bit vcpu statistics

2016-07-11 Thread David Matlack via Linuxppc-dev
On Mon, Jul 11, 2016 at 12:31 PM, Paolo Bonzini <pbonz...@redhat.com> wrote:
>
>
> On 11/07/2016 19:30, David Matlack wrote:
>> On Mon, Jul 11, 2016 at 10:05 AM, Paolo Bonzini <pbonz...@redhat.com> wrote:
>>>
>>>
>>> On 11/07/2016 18:51, David Matlack wrote:
>>>>>> vcpus have statistics associated with them which can be viewed within the
>>>>>> debugfs. Currently it is assumed within the vcpu_stat_get() and
>>>>>> vcpu_stat_get_per_vm() functions that all of these statistics are
>>>>>> represented as 32-bit numbers. The next patch adds some 64-bit 
>>>>>> statistics,
>>>>>> so add provisioning for the display of 64-bit vcpu statistics.
>>>> Thanks, we need 64-bit stats in other places as well. Can we use this
>>>> opportunity to wholesale upgrade all KVM stats from u32 to u64? Most
>>>> of this patch is duplicated code with "u32" swapped with "u64".
>>>>
>>>
>>> I'm not sure of what 32-bit architectures would do, but perhaps we could
>>> upgrade them to unsigned long at least.
>>
>> I thought u64 still existed on 32-bit architectures. unsigned long
>> would be fine but with the caveat that certain stats would overflow on
>> 32-bit architectures.
>
> Yes, but not all 32-bit architectures can do atomic read-modify-write
> (e.g. add) operations on 64-bit values.

I think that's ok, none of the stats currently use atomic operations.

>
> Paolo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 4/5] kvm/stats: Add provisioning for 64-bit vcpu statistics

2016-07-11 Thread David Matlack
On Mon, Jul 11, 2016 at 10:05 AM, Paolo Bonzini <pbonz...@redhat.com> wrote:
>
>
> On 11/07/2016 18:51, David Matlack wrote:
>>> > vcpus have statistics associated with them which can be viewed within the
>>> > debugfs. Currently it is assumed within the vcpu_stat_get() and
>>> > vcpu_stat_get_per_vm() functions that all of these statistics are
>>> > represented as 32-bit numbers. The next patch adds some 64-bit statistics,
>>> > so add provisioning for the display of 64-bit vcpu statistics.
>> Thanks, we need 64-bit stats in other places as well. Can we use this
>> opportunity to wholesale upgrade all KVM stats from u32 to u64? Most
>> of this patch is duplicated code with "u32" swapped with "u64".
>>
>
> I'm not sure of what 32-bit architectures would do, but perhaps we could
> upgrade them to unsigned long at least.

I thought u64 still existed on 32-bit architectures. unsigned long
would be fine but with the caveat that certain stats would overflow on
32-bit architectures.

>
> Paolo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module

2016-07-11 Thread David Matlack via Linuxppc-dev
On Mon, Jul 11, 2016 at 10:07 AM, Paolo Bonzini <pbonz...@redhat.com> wrote:
>
>
> On 11/07/2016 18:57, David Matlack wrote:
>> On Mon, Jul 11, 2016 at 12:08 AM, Suraj Jitindar Singh
>> <sjitindarsi...@gmail.com> wrote:
>> > This patch introduces new halt polling functionality into the kvm_hv kernel
>> > module. When a vcore is idle it will poll for some period of time before
>> > scheduling itself out.
>>
>> Is there any way to reuse the existing halt-polling code? Having two
>> copies risks them diverging over time.
>
> s/risks/guarantees/ :(
>
> Unfortunately, handling of the hardware threads in KVM PPC is a mess,
> and I don't think it's possible to remove the duplication.

Ah, ok. That's a shame.

>
> Paolo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module

2016-07-11 Thread David Matlack via Linuxppc-dev
On Mon, Jul 11, 2016 at 12:08 AM, Suraj Jitindar Singh
 wrote:
> This patch introduces new halt polling functionality into the kvm_hv kernel
> module. When a vcore is idle it will poll for some period of time before
> scheduling itself out.

Is there any way to reuse the existing halt-polling code? Having two
copies risks them diverging over time.

>
> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
> idle) we schedule ourselves out to allow something else to run. In the
> event that we need to wake up very quickly (for example an interrupt
> arrives), we are required to wait until we get scheduled again.
>
> Implement halt polling so that when a vcore is idle, and before scheduling
> ourselves, we poll for vcpus in the runnable_threads list which have
> pending exceptions or which leave the ceded state. If we poll successfully
> then we can get back into the guest very quickly without ever scheduling
> ourselves, otherwise we schedule ourselves out as before.
>
> Testing of this patch with a TCP round robin test between two guests with
> virtio network interfaces has found a decrease in round trip time from
> ~140us to ~115us. A performance gain is only seen when going out of and
> back into the guest often and quickly, otherwise there is no net benefit
> from the polling. The polling interval is adjusted such that when we are
> often scheduled out for long periods of time it is reduced, and when we
> often poll successfully it is increased. The rate at which the polling
> interval increases or decreases, and the maximum polling interval, can
> be set through module parameters.
>
> Based on the implementation in the generic kvm module by Wanpeng Li and
> Paolo Bonzini, and on direction from Paul Mackerras.
>
> ---
> Change Log:
>
> V1 -> V2:
> - Nothing
>
> Signed-off-by: Suraj Jitindar Singh 
> ---
>  arch/powerpc/include/asm/kvm_book3s.h |   1 +
>  arch/powerpc/include/asm/kvm_host.h   |   1 +
>  arch/powerpc/kvm/book3s_hv.c  | 115 
> +-
>  arch/powerpc/kvm/trace_hv.h   |  22 +++
>  4 files changed, 125 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 151f817..c261f52 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
> ulong pcr;
> ulong dpdes;/* doorbell state (POWER8) */
> ulong conferring_threads;
> +   unsigned int halt_poll_ns;
>  };
>
>  struct kvmppc_vcpu_book3s {
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 02d06e9..610f393 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ struct kvm_arch {
>  #define VCORE_SLEEPING 3
>  #define VCORE_RUNNING  4
>  #define VCORE_EXITING  5
> +#define VCORE_POLLING  6
>
>  /*
>   * Struct used to manage memory for a virtual processor area
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 3bcf9e6..0d8ce14 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, _param_ops, 
> _ipi_redirect,
>  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host 
> core");
>  #endif
>
> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
> +
> +/* Factor by which the vcore halt poll interval is grown, default is to 
> double
> + */
> +static unsigned int halt_poll_ns_grow = 2;
> +module_param(halt_poll_ns_grow, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
> +
> +/* Factor by which the vcore halt poll interval is shrunk, default is to 
> reset
> + */
> +static unsigned int halt_poll_ns_shrink;
> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
> +
>  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
>
> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore 
> *vc,
> finish_wait(>arch.cpu_run, );
>  }
>
> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> +   /* 10us base */
> +   if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
> +   vc->halt_poll_ns = 1;
> +   else
> +   vc->halt_poll_ns *= halt_poll_ns_grow;
> +
> +   if (vc->halt_poll_ns > halt_poll_max_ns)
> +   vc->halt_poll_ns = halt_poll_max_ns;
> +}
> +
> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> + 

Re: [PATCH V2 4/5] kvm/stats: Add provisioning for 64-bit vcpu statistics

2016-07-11 Thread David Matlack
On Mon, Jul 11, 2016 at 12:08 AM, Suraj Jitindar Singh
 wrote:
> vcpus have statistics associated with them which can be viewed within the
> debugfs. Currently it is assumed within the vcpu_stat_get() and
> vcpu_stat_get_per_vm() functions that all of these statistics are
> represented as 32-bit numbers. The next patch adds some 64-bit statistics,
> so add provisioning for the display of 64-bit vcpu statistics.

Thanks, we need 64-bit stats in other places as well. Can we use this
opportunity to wholesale upgrade all KVM stats from u32 to u64? Most
of this patch is duplicated code with "u32" swapped with "u64".

>
> ---
> Change Log:
>
> V1 -> V2:
> - Nothing
>
> Signed-off-by: Suraj Jitindar Singh 
> ---
>  arch/powerpc/kvm/book3s.c |  1 +
>  include/linux/kvm_host.h  |  1 +
>  virt/kvm/kvm_main.c   | 60 
> +++
>  3 files changed, 58 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 47018fc..ed9132b 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -40,6 +40,7 @@
>  #include "trace.h"
>
>  #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
> +#define VCPU_STAT_U64(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU_U64
>
>  /* #define EXIT_DEBUG */
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1c9c973..667b30e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -991,6 +991,7 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, 
> gpa_t gpa)
>  enum kvm_stat_kind {
> KVM_STAT_VM,
> KVM_STAT_VCPU,
> +   KVM_STAT_VCPU_U64,
>  };
>
>  struct kvm_stat_data {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 48bd520..7ab5901 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3566,6 +3566,20 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
> return 0;
>  }
>
> +static int vcpu_stat_u64_get_per_vm(void *data, u64 *val)
> +{
> +   int i;
> +   struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
> +   struct kvm_vcpu *vcpu;
> +
> +   *val = 0;
> +
> +   kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
> +   *val += *(u64 *)((void *)vcpu + stat_data->offset);
> +
> +   return 0;
> +}
> +
>  static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
>  {
> __simple_attr_check_format("%llu\n", 0ull);
> @@ -3573,6 +3587,13 @@ static int vcpu_stat_get_per_vm_open(struct inode 
> *inode, struct file *file)
>  NULL, "%llu\n");
>  }
>
> +static int vcpu_stat_u64_get_per_vm_open(struct inode *inode, struct file 
> *file)
> +{
> +   __simple_attr_check_format("%llu\n", 0ull);
> +   return kvm_debugfs_open(inode, file, vcpu_stat_u64_get_per_vm,
> +NULL, "%llu\n");
> +}
> +
>  static const struct file_operations vcpu_stat_get_per_vm_fops = {
> .owner   = THIS_MODULE,
> .open= vcpu_stat_get_per_vm_open,
> @@ -3582,9 +3603,19 @@ static const struct file_operations 
> vcpu_stat_get_per_vm_fops = {
> .llseek  = generic_file_llseek,
>  };
>
> +static const struct file_operations vcpu_stat_u64_get_per_vm_fops = {
> +   .owner   = THIS_MODULE,
> +   .open= vcpu_stat_u64_get_per_vm_open,
> +   .release = kvm_debugfs_release,
> +   .read= simple_attr_read,
> +   .write   = simple_attr_write,
> +   .llseek  = generic_file_llseek,
> +};
> +
>  static const struct file_operations *stat_fops_per_vm[] = {
> -   [KVM_STAT_VCPU] = _stat_get_per_vm_fops,
> -   [KVM_STAT_VM]   = _stat_get_per_vm_fops,
> +   [KVM_STAT_VCPU] = _stat_get_per_vm_fops,
> +   [KVM_STAT_VCPU_U64] = _stat_u64_get_per_vm_fops,
> +   [KVM_STAT_VM]   = _stat_get_per_vm_fops,
>  };
>
>  static int vm_stat_get(void *_offset, u64 *val)
> @@ -3627,9 +3658,30 @@ static int vcpu_stat_get(void *_offset, u64 *val)
>
>  DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
>
> +static int vcpu_stat_u64_get(void *_offset, u64 *val)
> +{
> +   unsigned offset = (long)_offset;
> +   struct kvm *kvm;
> +   struct kvm_stat_data stat_tmp = {.offset = offset};
> +   u64 tmp_val;
> +
> +   *val = 0;
> +   spin_lock(_lock);
> +   list_for_each_entry(kvm, _list, vm_list) {
> +   stat_tmp.kvm = kvm;
> +   vcpu_stat_u64_get_per_vm((void *)_tmp, _val);
> +   *val += tmp_val;
> +   }
> +   spin_unlock(_lock);
> +   return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_u64_fops, vcpu_stat_u64_get, NULL, 
> "%llu\n");
> +
>  static const struct file_operations *stat_fops[] = {
> -   [KVM_STAT_VCPU] = _stat_fops,
> -   [KVM_STAT_VM]   = _stat_fops,
> +   [KVM_STAT_VCPU] = _stat_fops,
> +   

Re: [PATCH V2 5/5] powerpc/kvm/stats: Implement existing and add new halt polling vcpu stats

2016-07-11 Thread David Matlack via Linuxppc-dev
On Mon, Jul 11, 2016 at 12:08 AM, Suraj Jitindar Singh
 wrote:
> vcpu stats are used to collect information about a vcpu which can be viewed
> in the debugfs. For example halt_attempted_poll and halt_successful_poll
> are used to keep track of the number of times the vcpu attempts to and
> successfully polls. These stats are currently not used on powerpc.
>
> Implement incrementation of the halt_attempted_poll and
> halt_successful_poll vcpu stats for powerpc. Since these stats are summed
> over all the vcpus for all running guests it doesn't matter which vcpu
> they are attributed to, thus we choose the current runner vcpu of the
> vcore.
>
> Also add new vcpu stats: halt_poll_time and halt_wait_time to be used to
> accumulate the total time spend polling and waiting respectively, and
> halt_successful_wait to accumulate the number of times the vcpu waits.
> Given that halt_poll_time and halt_wait_time are expressed in nanoseconds
> it is necessary to represent these as 64-bit quantities, otherwise they
> would overflow after only about 4 seconds.
>
> Given that the total time spend either polling or waiting will be known and
> the number of times that each was done, it will be possible to determine
> the average poll and wait times. This will give the ability to tune the kvm
> module parameters based on the calculated average wait and poll times.
>
> ---
> Change Log:
>
> V1 -> V2:
> - Nothing
>
> Signed-off-by: Suraj Jitindar Singh 
> ---
>  arch/powerpc/include/asm/kvm_host.h |  3 +++
>  arch/powerpc/kvm/book3s.c   |  3 +++
>  arch/powerpc/kvm/book3s_hv.c| 14 +-
>  3 files changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 610f393..66a7198 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -114,8 +114,11 @@ struct kvm_vcpu_stat {
> u32 emulated_inst_exits;
> u32 dec_exits;
> u32 ext_intr_exits;
> +   u64 halt_poll_time;
> +   u64 halt_wait_time;
> u32 halt_successful_poll;
> u32 halt_attempted_poll;
> +   u32 halt_successful_wait;
> u32 halt_poll_invalid;
> u32 halt_wakeup;
> u32 dbell_exits;
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index ed9132b..6217bea 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -53,8 +53,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> { "dec", VCPU_STAT(dec_exits) },
> { "ext_intr",VCPU_STAT(ext_intr_exits) },
> { "queue_intr",  VCPU_STAT(queue_intr) },
> +   { "halt_poll_time_ns",  VCPU_STAT_U64(halt_poll_time) },
> +   { "halt_wait_time_ns",  VCPU_STAT_U64(halt_wait_time) },
> { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
> { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
> +   { "halt_successful_wait",   VCPU_STAT(halt_successful_wait) },
> { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
> { "halt_wakeup", VCPU_STAT(halt_wakeup) },
> { "pf_storage",  VCPU_STAT(pf_storage) },
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 0d8ce14..a0dae63 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2688,6 +2688,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
> *vc)
> cur = start = ktime_get();
> if (vc->halt_poll_ns) {
> ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
> +   ++vc->runner->stat.halt_attempted_poll;
>
> vc->vcore_state = VCORE_POLLING;
> spin_unlock(>lock);
> @@ -2703,8 +2704,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
> *vc)
> spin_lock(>lock);
> vc->vcore_state = VCORE_INACTIVE;
>
> -   if (!do_sleep)
> +   if (!do_sleep) {
> +   ++vc->runner->stat.halt_successful_poll;
> goto out;
> +   }
> }
>
> prepare_to_swait(>wq, , TASK_INTERRUPTIBLE);
> @@ -2712,6 +2715,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
> *vc)
> if (kvmppc_vcore_check_block(vc)) {
> finish_swait(>wq, );
> do_sleep = 0;
> +   /* If we polled, count this as a successful poll */
> +   if (vc->halt_poll_ns)
> +   ++vc->runner->stat.halt_successful_poll;
> goto out;
> }
>
> @@ -2723,12 +2729,18 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore 
> *vc)
> spin_lock(>lock);
> vc->vcore_state = VCORE_INACTIVE;
> trace_kvmppc_vcore_blocked(vc, 1);
> +   ++vc->runner->stat.halt_successful_wait;
>
> cur = ktime_get();
>
>  out:
>