Add support for initial write protection of guest VM, to later manage dirty
pages. Reduced TLB flushing to one flush after memory region is write protected.
This is based on Erics patch, which applied cleanly. The only patch I found
in the archives was the memory region delete, but still in arm.c.

Signed-off-by: Mario Smarduch <m.smard...@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    1 +
 arch/arm/kvm/arm.c              |    4 ++
 arch/arm/kvm/mmu.c              |  125 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 315e3f3..7ac1fdc 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -229,5 +229,6 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
 void kvm_tlb_flush_vm(struct kvm *kvm);
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9a4bc10..7714cc6 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -249,6 +249,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                unmap_stage2_range(kvm, gpa, size);
                spin_unlock(&kvm->mmu_lock);
        }
+
+       /* Request has been issued to migrate the guest, 1st write protect VM */
+       if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d7a1846..b85ab56 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -648,6 +648,131 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, 
phys_addr_t *ipap)
        return false;
 }
 
+/**
+ * split_pmd - splits huge pages into small pages, required to keep a dirty
+ *  log of small memory granules, otherwise huge pages would need to be
+ *  migrated. Practically an idle system has problems migrating with
+ *  huge pages.  Called during WP of entire VM address space, done
+ *  initially when  migration thread isses the KVM_MEM_LOG_DIRTY_PAGES ioctl.
+ *  mmu_lock lock must be acquired by caller
+ *
+ * @kvm:        The KVM pointer
+ * @pmd:        pmd to 2nd stage huge page
+ * @addr: `     Guest Physical Address
+ */
+static bool split_pmd(struct kvm *kvm, pmd_t *pmd, u64 addr)
+{
+       struct page *page;
+       pfn_t pfn = pmd_pfn(*pmd);
+       pte_t *pte, new_pte;
+       int i;
+
+       page = alloc_page(GFP_KERNEL);
+       if (page == NULL)
+               return false;
+
+       pte = page_address(page);
+       /* first break up the huge page into small page pte's */
+       for (i = 0; i < PTRS_PER_PMD; i++) {
+               new_pte = pfn_pte(pfn+i, PAGE_S2);
+               pte[i] = new_pte;
+       }
+       kvm_clean_pte(pte);
+       /* now set the pmd to pte table */
+       pmd_populate_kernel(NULL, pmd, pte);
+
+       get_page(virt_to_page(pte));
+       return true;
+}
+
+
+/**
+ * kvm_mmu_slot_remove_access - write protects entire VM address space.
+ *  Called at start of migration when KVM_MEM_LOG_DIRTY_PAGES ioctl is
+ *  issued. After this function returns all pages - minus the ones faulted
+ *  in when mmu_lock is released, but those pages will be marked in dirty log
+ *  and are not forgotten.
+ *
+ *  Initial VM write protect sweep is required to keep track of dirty pages for
+ *  subsequent memory region dirty log retrieval.
+ *  - mmu_lock is held during - protect against concurent faults, mmu notifier
+ *    invalidate/unmap/update user pte, or direct device write to guest memory
+ *
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot the dirty log is retrieved for
+ */
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte, new_pte;
+       pgd_t *pgdp = kvm->arch.pgd;
+       struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+       u64 start = memslot->base_gfn << PAGE_SHIFT;
+       u64 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+       u64 addr = start;
+
+       spin_lock(&kvm->mmu_lock);
+       kvm->arch.migration_in_progress = 1;
+       while (addr < end) {
+               /* Relieve contention for mmu_lock. there is no need to flush
+                * TLBs here. TLB updates will be picked up on TLB refills or
+                * flush of VM TLBs. The important things is after you terminate
+                * loop all pmds have been split, write protected and visible
+                */
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock))
+                       cond_resched_lock(&kvm->mmu_lock);
+
+               pgd = pgdp + pgd_index(addr);
+               if (!pgd_present(*pgd)) {
+                       addr = pgd_addr_end(addr, end);
+                       continue;
+               }
+
+               pud = pud_offset(pgd, addr);
+               if (pud_huge(*pud) || !pud_present(*pud)) {
+                       addr = pud_addr_end(addr, end);
+                       continue;
+               }
+
+               pmd = pmd_offset(pud, addr);
+               if (!pmd_present(*pmd)) {
+                       addr = pmd_addr_end(addr, end);
+                       continue;
+               }
+               if (kvm_pmd_huge(*pmd)) {
+                       if (!split_pmd(kvm, pmd, addr)) {
+                               /* Should fail migration here, but return from
+                                * here is not reflected in user space. The
+                                * status is detected on first dirty log
+                                * retrieval, where you cause migration abort
+                                * (user space aborts).
+                                */
+                               kvm->arch.migration_in_progress = -1;
+                               spin_unlock(&kvm->mmu_lock);
+                               return;
+                       }
+                       addr = pmd_addr_end(addr, end);
+                       continue;
+               }
+               pte = pte_offset_kernel(pmd, addr);
+               addr += PAGE_SIZE;
+               if (!pte_present(*pte))
+                       continue;
+
+               /* Skip write protected or read only pages */
+               if ((*pte & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY)
+                       continue;
+
+               new_pte = pfn_pte(pte_pfn(*pte), PAGE_S2);
+               *pte = new_pte;
+       }
+       /* Flush VM TLBs */
+       kvm_tlb_flush_vm(kvm);
+       spin_unlock(&kvm->mmu_lock);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot,
                          unsigned long fault_status)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to