Re: [PATCH] KVM: ppc64: Enable ring-based dirty memory tracking

2023-07-16 Thread Kautuk Consul
Hi Jordan,

On 2023-07-06 14:15:13, Jordan Niethe wrote:
> 
> 
> On 8/6/23 10:34 pm, Kautuk Consul wrote:
> 
> Need at least a little context in the commit message itself:
> 
> "Enable ring-based dirty memory tracking on ppc64:"
Sure will take this in the v2 patch.
> 
> > - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL as ppc64 is weakly
> >ordered.
> > - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP because the
> >kvmppc_xive_native_set_attr is called in the context of an ioctl
> >syscall and will call kvmppc_xive_native_eq_sync for setting the
> >KVM_DEV_XIVE_EQ_SYNC attribute which will call mark_dirty_page()
> >when there isn't a running vcpu. Implemented the
> >kvm_arch_allow_write_without_running_vcpu to always return true
> >to allow mark_page_dirty_in_slot to mark the page dirty in the
> >memslot->dirty_bitmap in this case.
> 
> Should kvm_arch_allow_write_without_running_vcpu() only return true in the
> context of kvmppc_xive_native_eq_sync()?
Not required. Reason is: kvm_arch_allow_write_without_running_vcpu() is
anyway used only for avoiding the WARN_ON_ONCE in
mark_page_dirty_in_slot(). The memslot->dirty_bitmap in 
mark_page_dirty_in_slot()
will be anyway used only when the KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP is
set and the vcpu returned by kvm_get_running_vcpu() is NULL which is
what happens only when kvmppc_xive_native_eq_sync is called via the
ioctl syscall I mentioned.

> > +   *ptep = __pte(pte_val(*ptep) & ~(_PAGE_WRITE));
> On rpt I think you'd need to use kvmppc_radix_update_pte()?
Sure. I'll add a check for radix_enabled() and call
kvmppc_radix_update_pte() or a similar function in the v2 patch for this.


Re: [PATCH] KVM: ppc64: Enable ring-based dirty memory tracking

2023-07-05 Thread Jordan Niethe




On 8/6/23 10:34 pm, Kautuk Consul wrote:

Need at least a little context in the commit message itself:

"Enable ring-based dirty memory tracking on ppc64:"


- Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL as ppc64 is weakly
   ordered.
- Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP because the
   kvmppc_xive_native_set_attr is called in the context of an ioctl
   syscall and will call kvmppc_xive_native_eq_sync for setting the
   KVM_DEV_XIVE_EQ_SYNC attribute which will call mark_dirty_page()
   when there isn't a running vcpu. Implemented the
   kvm_arch_allow_write_without_running_vcpu to always return true
   to allow mark_page_dirty_in_slot to mark the page dirty in the
   memslot->dirty_bitmap in this case.


Should kvm_arch_allow_write_without_running_vcpu() only return true in 
the context of kvmppc_xive_native_eq_sync()?



- Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page
   offset.
- Implement the kvm_arch_mmu_enable_log_dirty_pt_masked function required
   for the generic KVM code to call.
- Add a check to kvmppc_vcpu_run_hv for checking whether the dirty
   ring is soft full.
- Implement the kvm_arch_flush_remote_tlbs_memslot function to support
   the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT config option.

On testing with live migration it was found that there is around
150-180 ms improvment in overall migration time with this

Signed-off-by: Kautuk Consul 
---
  Documentation/virt/kvm/api.rst  |  2 +-
  arch/powerpc/include/uapi/asm/kvm.h |  2 ++
  arch/powerpc/kvm/Kconfig|  2 ++
  arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 +
  arch/powerpc/kvm/book3s_hv.c|  3 +++
  include/linux/kvm_dirty_ring.h  |  5 
  6 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index add067793b90..ce1ebc513bae 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8114,7 +8114,7 @@ regardless of what has actually been exposed through the 
CPUID leaf.
  8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
  --
  
-:Architectures: x86, arm64

+:Architectures: x86, arm64, ppc64
  :Parameters: args[0] - size of the dirty log ring
  
  KVM is capable of tracking dirty memory using ring buffers that are

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 9f18fa090f1f..f722309ed7fb 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -33,6 +33,8 @@
  /* Not always available, but if it is, this is the correct offset.  */
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64

+
  struct kvm_regs {
__u64 pc;
__u64 cr;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 902611954200..c93354ec3bd5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -26,6 +26,8 @@ config KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select INTERVAL_TREE
+   select HAVE_KVM_DIRTY_RING_ACQ_REL
+   select NEED_KVM_DIRTY_RING_WITH_BITMAP
  
  config KVM_BOOK3S_HANDLER

bool
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f765d5ad436..c92e8022e017 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2147,3 +2147,45 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
  
  	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;

  }
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+ * dirty pages.
+ *
+ * It write protects selected pages to enable dirty logging for them.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+struct kvm_memory_slot *slot,
+gfn_t gfn_offset,
+unsigned long mask)
+{
+   phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+   phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
+   phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+   while (start < end) {
+   pte_t *ptep;
+   unsigned int shift;
+
+   ptep = find_kvm_secondary_pte(kvm, start, );
+
+   *ptep = __pte(pte_val(*ptep) & ~(_PAGE_WRITE));

On rpt I think you'd need to use kvmppc_radix_update_pte()?


+
+   start += PAGE_SIZE > +   }
+}
+
+#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
+{
+   return true;
+}
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+   const struct kvm_memory_slot *memslot)
+{
+   kvm_flush_remote_tlbs(kvm);
+}
+#endif
diff --git 

Re: [PATCH] KVM: ppc64: Enable ring-based dirty memory tracking

2023-07-02 Thread Kautuk Consul
Hi Everyone,

On 2023-06-08 08:34:48, Kautuk Consul wrote:
> - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL as ppc64 is weakly
>   ordered.
> - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP because the
>   kvmppc_xive_native_set_attr is called in the context of an ioctl
>   syscall and will call kvmppc_xive_native_eq_sync for setting the
>   KVM_DEV_XIVE_EQ_SYNC attribute which will call mark_dirty_page()
>   when there isn't a running vcpu. Implemented the
>   kvm_arch_allow_write_without_running_vcpu to always return true
>   to allow mark_page_dirty_in_slot to mark the page dirty in the
>   memslot->dirty_bitmap in this case.
> - Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page
>   offset.
> - Implement the kvm_arch_mmu_enable_log_dirty_pt_masked function required
>   for the generic KVM code to call.
> - Add a check to kvmppc_vcpu_run_hv for checking whether the dirty
>   ring is soft full.
> - Implement the kvm_arch_flush_remote_tlbs_memslot function to support
>   the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT config option.
> 
> On testing with live migration it was found that there is around
> 150-180 ms improvment in overall migration time with this patch.
> 
> Signed-off-by: Kautuk Consul 
Can someone review this ?


Re: [PATCH] KVM: ppc64: Enable ring-based dirty memory tracking

2023-06-18 Thread Kautuk Consul
Hi Nick/Gavin/Everyone,

On 2023-06-08 08:34:48, Kautuk Consul wrote:
> - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL as ppc64 is weakly
>   ordered.
> - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP because the
>   kvmppc_xive_native_set_attr is called in the context of an ioctl
>   syscall and will call kvmppc_xive_native_eq_sync for setting the
>   KVM_DEV_XIVE_EQ_SYNC attribute which will call mark_dirty_page()
>   when there isn't a running vcpu. Implemented the
>   kvm_arch_allow_write_without_running_vcpu to always return true
>   to allow mark_page_dirty_in_slot to mark the page dirty in the
>   memslot->dirty_bitmap in this case.
> - Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page
>   offset.
> - Implement the kvm_arch_mmu_enable_log_dirty_pt_masked function required
>   for the generic KVM code to call.
> - Add a check to kvmppc_vcpu_run_hv for checking whether the dirty
>   ring is soft full.
> - Implement the kvm_arch_flush_remote_tlbs_memslot function to support
>   the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT config option.
> 
> On testing with live migration it was found that there is around
> 150-180 ms improvment in overall migration time with this patch.
> 
> Signed-off-by: Kautuk Consul 
> ---
>  Documentation/virt/kvm/api.rst  |  2 +-
>  arch/powerpc/include/uapi/asm/kvm.h |  2 ++
>  arch/powerpc/kvm/Kconfig|  2 ++
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 +
>  arch/powerpc/kvm/book3s_hv.c|  3 +++
>  include/linux/kvm_dirty_ring.h  |  5 
>  6 files changed, 55 insertions(+), 1 deletion(-)
> 
Any review comments on this ?
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index add067793b90..ce1ebc513bae 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -8114,7 +8114,7 @@ regardless of what has actually been exposed through 
> the CPUID leaf.
>  8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
>  --
> 
> -:Architectures: x86, arm64
> +:Architectures: x86, arm64, ppc64
>  :Parameters: args[0] - size of the dirty log ring
> 
>  KVM is capable of tracking dirty memory using ring buffers that are
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> b/arch/powerpc/include/uapi/asm/kvm.h
> index 9f18fa090f1f..f722309ed7fb 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -33,6 +33,8 @@
>  /* Not always available, but if it is, this is the correct offset.  */
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
> 
> +#define KVM_DIRTY_LOG_PAGE_OFFSET 64
> +
>  struct kvm_regs {
>   __u64 pc;
>   __u64 cr;
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 902611954200..c93354ec3bd5 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -26,6 +26,8 @@ config KVM
>   select IRQ_BYPASS_MANAGER
>   select HAVE_KVM_IRQ_BYPASS
>   select INTERVAL_TREE
> + select HAVE_KVM_DIRTY_RING_ACQ_REL
> + select NEED_KVM_DIRTY_RING_WITH_BITMAP
> 
>  config KVM_BOOK3S_HANDLER
>   bool
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 7f765d5ad436..c92e8022e017 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -2147,3 +2147,45 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
> 
>   vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
>  }
> +
> +/*
> + * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for 
> selected
> + * dirty pages.
> + *
> + * It write protects selected pages to enable dirty logging for them.
> + */
> +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
> +  struct kvm_memory_slot *slot,
> +  gfn_t gfn_offset,
> +  unsigned long mask)
> +{
> + phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
> + phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
> + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
> +
> + while (start < end) {
> + pte_t *ptep;
> + unsigned int shift;
> +
> + ptep = find_kvm_secondary_pte(kvm, start, );
> +
> + *ptep = __pte(pte_val(*ptep) & ~(_PAGE_WRITE));
> +
> + start += PAGE_SIZE;
> + }
> +}
> +
> +#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
> +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
> +{
> + return true;
> +}
> +#endif
> +
> +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
> +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
> + const struct kvm_memory_slot *memslot)
> +{
> + kvm_flush_remote_tlbs(kvm);
> +}
> +#endif
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 

[PATCH] KVM: ppc64: Enable ring-based dirty memory tracking

2023-06-08 Thread Kautuk Consul
- Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL as ppc64 is weakly
  ordered.
- Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP because the
  kvmppc_xive_native_set_attr is called in the context of an ioctl
  syscall and will call kvmppc_xive_native_eq_sync for setting the
  KVM_DEV_XIVE_EQ_SYNC attribute which will call mark_dirty_page()
  when there isn't a running vcpu. Implemented the
  kvm_arch_allow_write_without_running_vcpu to always return true
  to allow mark_page_dirty_in_slot to mark the page dirty in the
  memslot->dirty_bitmap in this case.
- Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page
  offset.
- Implement the kvm_arch_mmu_enable_log_dirty_pt_masked function required
  for the generic KVM code to call.
- Add a check to kvmppc_vcpu_run_hv for checking whether the dirty
  ring is soft full.
- Implement the kvm_arch_flush_remote_tlbs_memslot function to support
  the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT config option.

On testing with live migration it was found that there is around
150-180 ms improvment in overall migration time with this patch.

Signed-off-by: Kautuk Consul 
---
 Documentation/virt/kvm/api.rst  |  2 +-
 arch/powerpc/include/uapi/asm/kvm.h |  2 ++
 arch/powerpc/kvm/Kconfig|  2 ++
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 +
 arch/powerpc/kvm/book3s_hv.c|  3 +++
 include/linux/kvm_dirty_ring.h  |  5 
 6 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index add067793b90..ce1ebc513bae 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8114,7 +8114,7 @@ regardless of what has actually been exposed through the 
CPUID leaf.
 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
 --
 
-:Architectures: x86, arm64
+:Architectures: x86, arm64, ppc64
 :Parameters: args[0] - size of the dirty log ring
 
 KVM is capable of tracking dirty memory using ring buffers that are
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 9f18fa090f1f..f722309ed7fb 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -33,6 +33,8 @@
 /* Not always available, but if it is, this is the correct offset.  */
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
+
 struct kvm_regs {
__u64 pc;
__u64 cr;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 902611954200..c93354ec3bd5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -26,6 +26,8 @@ config KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select INTERVAL_TREE
+   select HAVE_KVM_DIRTY_RING_ACQ_REL
+   select NEED_KVM_DIRTY_RING_WITH_BITMAP
 
 config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f765d5ad436..c92e8022e017 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2147,3 +2147,45 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
 }
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+ * dirty pages.
+ *
+ * It write protects selected pages to enable dirty logging for them.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+struct kvm_memory_slot *slot,
+gfn_t gfn_offset,
+unsigned long mask)
+{
+   phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+   phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
+   phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+   while (start < end) {
+   pte_t *ptep;
+   unsigned int shift;
+
+   ptep = find_kvm_secondary_pte(kvm, start, );
+
+   *ptep = __pte(pte_val(*ptep) & ~(_PAGE_WRITE));
+
+   start += PAGE_SIZE;
+   }
+}
+
+#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
+{
+   return true;
+}
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+   const struct kvm_memory_slot *memslot)
+{
+   kvm_flush_remote_tlbs(kvm);
+}
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 130bafdb1430..1d1264ea72c4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4804,6 +4804,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
return -EINTR;
}
 
+   if (kvm_dirty_ring_check_request(vcpu))
+   return 0;
+
 #ifdef