Kegian,

In the future, please Cc me on  your KVM/arm64 patches, as well as
all the reviewers mentioned in the MAINTAINERS file.

On 2020-03-09 08:57, Keqian Zhu wrote:
There is already support of enabling dirty log graually

gradually?

in small chunks for x86. This adds support for arm64.

Under the Huawei Kunpeng 920 2.6GHz platform, I did some
tests with a 128G linux VM and counted the time taken of

Linux

memory_global_dirty_log_start, here is the numbers:

VM Size        Before    After optimization
128G           527ms     4ms

What does this benchmark do? Can you please provide a pointer to it?


Signed-off-by: Keqian Zhu <[email protected]>
---
Cc: Jay Zhou <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: Peter Xu <[email protected]>
Cc: Sean Christopherson <[email protected]>
---
 Documentation/virt/kvm/api.rst    |  2 +-
 arch/arm64/include/asm/kvm_host.h |  4 ++++
 virt/kvm/arm/mmu.c                | 30 ++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0adef66585b1..89d4f2680af1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5735,7 +5735,7 @@ will be initialized to 1 when created.  This
also improves performance because
dirty logging can be enabled gradually in small chunks on the first call
 to KVM_CLEAR_DIRTY_LOG.  KVM_DIRTY_LOG_INITIALLY_SET depends on
 KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
-x86 for now).
+x86 and arm64 for now).

What is this based on? I can't find this in -next, and you provide no
context whatsoever.

I assume this is related to this:
https://lore.kernel.org/kvm/[email protected]/

Is there a userspace counterpart to it?

KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
diff --git a/arch/arm64/include/asm/kvm_host.h
b/arch/arm64/include/asm/kvm_host.h
index d87aa609d2b6..0deb2ac7d091 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/jump_label.h>
 #include <linux/kvm_types.h>
 #include <linux/percpu.h>
+#include <linux/kvm.h>
 #include <asm/arch_gicv3.h>
 #include <asm/barrier.h>
 #include <asm/cpufeature.h>
@@ -45,6 +46,9 @@
 #define KVM_REQ_VCPU_RESET     KVM_ARCH_REQ(2)
 #define KVM_REQ_RECORD_STEAL   KVM_ARCH_REQ(3)

+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+                                       KVM_DIRTY_LOG_INITIALLY_SET)
+
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);

 extern unsigned int kvm_sve_max_vl;
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e3b9ee268823..5c7ca84dec85 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1438,9 +1438,11 @@ static void stage2_wp_ptes(pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
  * @pud:       pointer to pud entry
  * @addr:      range start address
  * @end:       range end address
+ * @wp_ptes:   write protect ptes or not
  */
 static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
-                          phys_addr_t addr, phys_addr_t end)
+                          phys_addr_t addr, phys_addr_t end,
+                          bool wp_ptes)

If you are going to pass extra parameters like this, make it at least
extensible (unsigned long flags, for example).

 {
        pmd_t *pmd;
        phys_addr_t next;
@@ -1453,7 +1455,7 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
                        if (pmd_thp_or_huge(*pmd)) {
                                if (!kvm_s2pmd_readonly(pmd))
                                        kvm_set_s2pmd_readonly(pmd);
-                       } else {
+                       } else if (wp_ptes) {
                                stage2_wp_ptes(pmd, addr, next);
                        }
                }
@@ -1465,9 +1467,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
  * @pgd:       pointer to pgd entry
  * @addr:      range start address
  * @end:       range end address
+ * @wp_ptes:   write protect ptes or not
  */
 static void  stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
-                           phys_addr_t addr, phys_addr_t end)
+                           phys_addr_t addr, phys_addr_t end,
+                           bool wp_ptes)
 {
        pud_t *pud;
        phys_addr_t next;
@@ -1480,7 +1484,7 @@ static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
                                if (!kvm_s2pud_readonly(pud))
                                        kvm_set_s2pud_readonly(pud);
                        } else {
-                               stage2_wp_pmds(kvm, pud, addr, next);
+                               stage2_wp_pmds(kvm, pud, addr, next, wp_ptes);
                        }
                }
        } while (pud++, addr = next, addr != end);
@@ -1491,8 +1495,10 @@ static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
  * @kvm:       The KVM pointer
  * @addr:      Start address of range
  * @end:       End address of range
+ * @wp_ptes:   Write protect ptes or not
  */
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr,
+                           phys_addr_t end, bool wp_ptes)
 {
        pgd_t *pgd;
        phys_addr_t next;
@@ -1513,7 +1519,7 @@ static void stage2_wp_range(struct kvm *kvm,
phys_addr_t addr, phys_addr_t end)
                        break;
                next = stage2_pgd_addr_end(kvm, addr, end);
                if (stage2_pgd_present(kvm, *pgd))
-                       stage2_wp_puds(kvm, pgd, addr, next);
+                       stage2_wp_puds(kvm, pgd, addr, next, wp_ptes);
        } while (pgd++, addr = next, addr != end);
 }

@@ -1535,6 +1541,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
        struct kvm_memslots *slots = kvm_memslots(kvm);
        struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
        phys_addr_t start, end;
+       bool wp_ptes;

        if (WARN_ON_ONCE(!memslot))
                return;
@@ -1543,7 +1550,14 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
        end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;

        spin_lock(&kvm->mmu_lock);
-       stage2_wp_range(kvm, start, end);
+       /*
+        * If we're with initial-all-set, we don't need to write protect
+        * any small page because they're reported as dirty already.
+        * However we still need to write-protect huge pages so that the
+        * page split can happen lazily on the first write to the huge page.
+        */
+       wp_ptes = !kvm_dirty_log_manual_protect_and_init_set(kvm);
+       stage2_wp_range(kvm, start, end, wp_ptes);
        spin_unlock(&kvm->mmu_lock);
        kvm_flush_remote_tlbs(kvm);
 }
@@ -1567,7 +1581,7 @@ static void
kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
        phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
        phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;

-       stage2_wp_range(kvm, start, end);
+       stage2_wp_range(kvm, start, end, true);
 }

 /*

Thanks,

        M.
--
Jazz is not dead. It just smells funny...
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to