Re: [PATCH 2/2] kvm: change the dirty page tracking to work with dirty bity

Marcelo Tosatti Wed, 10 Jun 2009 13:45:22 -0700

On Wed, Jun 10, 2009 at 07:23:25PM +0300, Izik Eidus wrote:
> change the dirty page tracking to work with dirty bity instead of page fault.
> right now the dirty page tracking work with the help of page faults, when we
> want to track a page for being dirty, we write protect it and we mark it dirty
> when we have write page fault, this code move into looking at the dirty bit
> of the spte.
> 
> Signed-off-by: Izik Eidus <[email protected]>
> ---
>  arch/ia64/kvm/kvm-ia64.c        |    4 +++
>  arch/powerpc/kvm/powerpc.c      |    4 +++
>  arch/s390/kvm/kvm-s390.c        |    4 +++
>  arch/x86/include/asm/kvm_host.h |    3 ++
>  arch/x86/kvm/mmu.c              |   42 ++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm.c              |    7 ++++++
>  arch/x86/kvm/vmx.c              |    7 ++++++
>  arch/x86/kvm/x86.c              |   26 ++++++++++++++++++++---
>  include/linux/kvm_host.h        |    1 +
>  virt/kvm/kvm_main.c             |    6 ++++-
>  10 files changed, 96 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 3199221..5914128 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1809,6 +1809,10 @@ void kvm_arch_exit(void)
>       kvm_vmm_info = NULL;
>  }
>  
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +}
> +
>  static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
>               struct kvm_dirty_log *log)
>  {
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 2cf915e..6beb368 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -418,6 +418,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
> kvm_dirty_log *log)
>       return -ENOTSUPP;
>  }


>  

#ifndef KVM_ARCH_HAVE_DIRTY_LOG
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +}
> +
#endif

in virt/kvm/main.c


> index c7b0cc2..8a24149 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -527,6 +527,7 @@ struct kvm_x86_ops {
>       int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>       int (*get_tdp_level)(void);
>       u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> +     int (*dirty_bit_support)(void);
>  };
>  
>  extern struct kvm_x86_ops *kvm_x86_ops;
> @@ -796,4 +797,6 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
>  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
>  int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
>  
> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 809cce0..500e0e2 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -140,6 +140,8 @@ module_param(oos_shadow, bool, 0644);
>  #define ACC_USER_MASK    PT_USER_MASK
>  #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
>  
> +#define SPTE_DONT_DIRTY (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
> +
>  #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
>  
>  struct kvm_rmap_desc {
> @@ -629,6 +631,29 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long 
> *rmapp, u64 *spte)
>       return NULL;
>  }
>  
> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp)
> +{
> +     u64 *spte;
> +     int dirty = 0;
> +
> +     if (!shadow_dirty_mask)
> +             return 0;
> +
> +     spte = rmap_next(kvm, rmapp, NULL);
> +     while (spte) {
> +             if (*spte & PT_DIRTY_MASK) {
> +                     set_shadow_pte(spte, (*spte &= ~PT_DIRTY_MASK) |
> +                                    SPTE_DONT_DIRTY);
> +                     dirty = 1;
> +                     break;
> +             }
> +             spte = rmap_next(kvm, rmapp, spte);
> +     }
> +
> +     return dirty;
> +}
> +
> +
>  static int rmap_write_protect(struct kvm *kvm, u64 gfn)
>  {
>       unsigned long *rmapp;
> @@ -1381,11 +1406,17 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>  static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>       int ret;
> +     int i;
> +
>       ++kvm->stat.mmu_shadow_zapped;
>       ret = mmu_zap_unsync_children(kvm, sp);
>       kvm_mmu_page_unlink_children(kvm, sp);
>       kvm_mmu_unlink_parents(kvm, sp);
>       kvm_flush_remote_tlbs(kvm);
> +     for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
> +             if (sp->spt[i] & PT_DIRTY_MASK)
> +                     mark_page_dirty(kvm, sp->gfns[i]);
> +     }

Also need to transfer dirty bit in other places probably.

>       if (!sp->role.invalid && !sp->role.direct)
>               unaccount_shadowed(kvm, sp->gfn);
>       if (sp->unsync)
> @@ -1676,7 +1707,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
> *shadow_pte,
>        * whether the guest actually used the pte (in order to detect
>        * demand paging).
>        */
> -     spte = shadow_base_present_pte | shadow_dirty_mask;
> +     spte = shadow_base_present_pte;
> +     if (!(spte & SPTE_DONT_DIRTY))
> +             spte |= shadow_dirty_mask;
> +
>       if (!speculative)
>               spte |= shadow_accessed_mask;
>       if (!dirty)
> @@ -1725,8 +1759,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
> *shadow_pte,
>               }
>       }
>  
> -     if (pte_access & ACC_WRITE_MASK)
> -             mark_page_dirty(vcpu->kvm, gfn);
> +     if (!shadow_dirty_mask) {
> +             if (pte_access & ACC_WRITE_MASK)
> +                     mark_page_dirty(vcpu->kvm, gfn);
> +     }

You can avoid the mark_page_dirty if SPTE_DONT_DIRTY? (which is a good idea,
gfn_to_memslot_unaliased and friends show up high in profiling).

>  set_pte:
>       set_shadow_pte(shadow_pte, spte);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 37397f6..5b6351e 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2724,6 +2724,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, 
> gfn_t gfn, bool is_mmio)
>       return 0;
>  }
>  
> +static int svm_dirty_bit_support(void)
> +{
> +     return 1;
> +}
> +
>  static struct kvm_x86_ops svm_x86_ops = {
>       .cpu_has_kvm_support = has_svm,
>       .disabled_by_bios = is_disabled,
> @@ -2785,6 +2790,8 @@ static struct kvm_x86_ops svm_x86_ops = {
>       .set_tss_addr = svm_set_tss_addr,
>       .get_tdp_level = get_npt_level,
>       .get_mt_mask = svm_get_mt_mask,
> +
> +     .dirty_bit_support = svm_dirty_bit_support,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 673bcb3..8903314 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3774,6 +3774,11 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, 
> gfn_t gfn, bool is_mmio)
>       return ret;
>  }
>  
> +static int vmx_dirty_bit_support(void)
> +{
> +     return false;
> +}
> +
>  static struct kvm_x86_ops vmx_x86_ops = {
>       .cpu_has_kvm_support = cpu_has_kvm_support,
>       .disabled_by_bios = vmx_disabled_by_bios,
> @@ -3833,6 +3838,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>       .set_tss_addr = vmx_set_tss_addr,
>       .get_tdp_level = get_ept_level,
>       .get_mt_mask = vmx_get_mt_mask,
> +
> +     .dirty_bit_support = vmx_dirty_bit_support,
>  };
>  
>  static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 272e2e8..e06387c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1963,6 +1963,20 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
>       return 0;
>  }
>  
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +     int i;
> +
> +     spin_lock(&kvm->mmu_lock);
> +     for (i = 0; i < memslot->npages; ++i) {
> +             if (!test_bit(i, memslot->dirty_bitmap)) {
> +                     if (is_dirty_and_clean_rmapp(kvm, &memslot->rmap[i]))
> +                             set_bit(i, memslot->dirty_bitmap);
> +             }
> +     }
> +     spin_unlock(&kvm->mmu_lock);
> +}
> +
>  /*
>   * Get (and clear) the dirty memory log for a memory slot.
>   */
> @@ -1982,10 +1996,14 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>  
>       /* If nothing is dirty, don't bother messing with page tables. */
>       if (is_dirty) {
> -             spin_lock(&kvm->mmu_lock);
> -             kvm_mmu_slot_remove_write_access(kvm, log->slot);
> -             spin_unlock(&kvm->mmu_lock);
> -             kvm_flush_remote_tlbs(kvm);
> +             if (!kvm_x86_ops->dirty_bit_support()) {
> +                     spin_lock(&kvm->mmu_lock);
> +                     /*  remove_write_access() flush the tlb */
> +                     kvm_mmu_slot_remove_write_access(kvm, log->slot);
> +                     spin_unlock(&kvm->mmu_lock);
> +             } else {
> +                     kvm_flush_remote_tlbs(kvm);
> +             }
>               memslot = &kvm->memslots[log->slot];
>               n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
>               memset(memslot->dirty_bitmap, 0, n);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index cdf1279..d1657a3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -250,6 +250,7 @@ int kvm_dev_ioctl_check_extension(long ext);
>  
>  int kvm_get_dirty_log(struct kvm *kvm,
>                       struct kvm_dirty_log *log, int *is_dirty);
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot 
> *memslot);
>  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>                               struct kvm_dirty_log *log);
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 3046e9c..a876231 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1135,8 +1135,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
>       }
>  
>       /* Free page dirty bitmap if unneeded */
> -     if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
> +     if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) {
>               new.dirty_bitmap = NULL;
> +             if (old.flags & KVM_MEM_LOG_DIRTY_PAGES)
> +                     kvm_arch_flush_shadow(kvm);
> +     }

Whats this for?

The idea of making dirty bit accessible (also can use it to map host
ptes read-only, when guest fault is read-only, for KSM (*)) is good. But
better first introduce infrastructure to handle dirty bit (making sure
the bit is transferred properly), then have logging make use of it.

* EPT violations do no transfer fault information to the page fault
handler, but its available (there's a vm-exit field).

>       r = -ENOMEM;
>  
> @@ -1279,6 +1282,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
>       if (!memslot->dirty_bitmap)
>               goto out;
>  
> +     kvm_arch_get_dirty_log(kvm, memslot);
>       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
>  
>       for (i = 0; !any && i < n/sizeof(long); ++i)
> -- 
> 1.5.6.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] kvm: change the dirty page tracking to work with dirty bity

Reply via email to