On Wed, Jun 10, 2009 at 07:23:25PM +0300, Izik Eidus wrote:
> change the dirty page tracking to work with dirty bity instead of page fault.
> right now the dirty page tracking work with the help of page faults, when we
> want to track a page for being dirty, we write protect it and we mark it dirty
> when we have write page fault, this code move into looking at the dirty bit
> of the spte.
>
> Signed-off-by: Izik Eidus <[email protected]>
> ---
> arch/ia64/kvm/kvm-ia64.c | 4 +++
> arch/powerpc/kvm/powerpc.c | 4 +++
> arch/s390/kvm/kvm-s390.c | 4 +++
> arch/x86/include/asm/kvm_host.h | 3 ++
> arch/x86/kvm/mmu.c | 42 ++++++++++++++++++++++++++++++++++++--
> arch/x86/kvm/svm.c | 7 ++++++
> arch/x86/kvm/vmx.c | 7 ++++++
> arch/x86/kvm/x86.c | 26 ++++++++++++++++++++---
> include/linux/kvm_host.h | 1 +
> virt/kvm/kvm_main.c | 6 ++++-
> 10 files changed, 96 insertions(+), 8 deletions(-)
>
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 3199221..5914128 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1809,6 +1809,10 @@ void kvm_arch_exit(void)
> kvm_vmm_info = NULL;
> }
>
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +}
> +
> static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
> struct kvm_dirty_log *log)
> {
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 2cf915e..6beb368 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -418,6 +418,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct
> kvm_dirty_log *log)
> return -ENOTSUPP;
> }
>
#ifndef KVM_ARCH_HAVE_DIRTY_LOG
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +}
> +
#endif
in virt/kvm/main.c
> index c7b0cc2..8a24149 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -527,6 +527,7 @@ struct kvm_x86_ops {
> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> int (*get_tdp_level)(void);
> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> + int (*dirty_bit_support)(void);
> };
>
> extern struct kvm_x86_ops *kvm_x86_ops;
> @@ -796,4 +797,6 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
> int kvm_age_hva(struct kvm *kvm, unsigned long hva);
> int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
>
> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp);
> +
> #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 809cce0..500e0e2 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -140,6 +140,8 @@ module_param(oos_shadow, bool, 0644);
> #define ACC_USER_MASK PT_USER_MASK
> #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
>
> +#define SPTE_DONT_DIRTY (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
> +
> #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
>
> struct kvm_rmap_desc {
> @@ -629,6 +631,29 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long
> *rmapp, u64 *spte)
> return NULL;
> }
>
> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp)
> +{
> + u64 *spte;
> + int dirty = 0;
> +
> + if (!shadow_dirty_mask)
> + return 0;
> +
> + spte = rmap_next(kvm, rmapp, NULL);
> + while (spte) {
> + if (*spte & PT_DIRTY_MASK) {
> + set_shadow_pte(spte, (*spte &= ~PT_DIRTY_MASK) |
> + SPTE_DONT_DIRTY);
> + dirty = 1;
> + break;
> + }
> + spte = rmap_next(kvm, rmapp, spte);
> + }
> +
> + return dirty;
> +}
> +
> +
> static int rmap_write_protect(struct kvm *kvm, u64 gfn)
> {
> unsigned long *rmapp;
> @@ -1381,11 +1406,17 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
> static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> {
> int ret;
> + int i;
> +
> ++kvm->stat.mmu_shadow_zapped;
> ret = mmu_zap_unsync_children(kvm, sp);
> kvm_mmu_page_unlink_children(kvm, sp);
> kvm_mmu_unlink_parents(kvm, sp);
> kvm_flush_remote_tlbs(kvm);
> + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
> + if (sp->spt[i] & PT_DIRTY_MASK)
> + mark_page_dirty(kvm, sp->gfns[i]);
> + }
Also need to transfer dirty bit in other places probably.
> if (!sp->role.invalid && !sp->role.direct)
> unaccount_shadowed(kvm, sp->gfn);
> if (sp->unsync)
> @@ -1676,7 +1707,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64
> *shadow_pte,
> * whether the guest actually used the pte (in order to detect
> * demand paging).
> */
> - spte = shadow_base_present_pte | shadow_dirty_mask;
> + spte = shadow_base_present_pte;
> + if (!(spte & SPTE_DONT_DIRTY))
> + spte |= shadow_dirty_mask;
> +
> if (!speculative)
> spte |= shadow_accessed_mask;
> if (!dirty)
> @@ -1725,8 +1759,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64
> *shadow_pte,
> }
> }
>
> - if (pte_access & ACC_WRITE_MASK)
> - mark_page_dirty(vcpu->kvm, gfn);
> + if (!shadow_dirty_mask) {
> + if (pte_access & ACC_WRITE_MASK)
> + mark_page_dirty(vcpu->kvm, gfn);
> + }
You can avoid the mark_page_dirty if SPTE_DONT_DIRTY? (which is a good idea,
gfn_to_memslot_unaliased and friends show up high in profiling).
> set_pte:
> set_shadow_pte(shadow_pte, spte);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 37397f6..5b6351e 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2724,6 +2724,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu,
> gfn_t gfn, bool is_mmio)
> return 0;
> }
>
> +static int svm_dirty_bit_support(void)
> +{
> + return 1;
> +}
> +
> static struct kvm_x86_ops svm_x86_ops = {
> .cpu_has_kvm_support = has_svm,
> .disabled_by_bios = is_disabled,
> @@ -2785,6 +2790,8 @@ static struct kvm_x86_ops svm_x86_ops = {
> .set_tss_addr = svm_set_tss_addr,
> .get_tdp_level = get_npt_level,
> .get_mt_mask = svm_get_mt_mask,
> +
> + .dirty_bit_support = svm_dirty_bit_support,
> };
>
> static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 673bcb3..8903314 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3774,6 +3774,11 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu,
> gfn_t gfn, bool is_mmio)
> return ret;
> }
>
> +static int vmx_dirty_bit_support(void)
> +{
> + return false;
> +}
> +
> static struct kvm_x86_ops vmx_x86_ops = {
> .cpu_has_kvm_support = cpu_has_kvm_support,
> .disabled_by_bios = vmx_disabled_by_bios,
> @@ -3833,6 +3838,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .set_tss_addr = vmx_set_tss_addr,
> .get_tdp_level = get_ept_level,
> .get_mt_mask = vmx_get_mt_mask,
> +
> + .dirty_bit_support = vmx_dirty_bit_support,
> };
>
> static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 272e2e8..e06387c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1963,6 +1963,20 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
> return 0;
> }
>
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> + int i;
> +
> + spin_lock(&kvm->mmu_lock);
> + for (i = 0; i < memslot->npages; ++i) {
> + if (!test_bit(i, memslot->dirty_bitmap)) {
> + if (is_dirty_and_clean_rmapp(kvm, &memslot->rmap[i]))
> + set_bit(i, memslot->dirty_bitmap);
> + }
> + }
> + spin_unlock(&kvm->mmu_lock);
> +}
> +
> /*
> * Get (and clear) the dirty memory log for a memory slot.
> */
> @@ -1982,10 +1996,14 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>
> /* If nothing is dirty, don't bother messing with page tables. */
> if (is_dirty) {
> - spin_lock(&kvm->mmu_lock);
> - kvm_mmu_slot_remove_write_access(kvm, log->slot);
> - spin_unlock(&kvm->mmu_lock);
> - kvm_flush_remote_tlbs(kvm);
> + if (!kvm_x86_ops->dirty_bit_support()) {
> + spin_lock(&kvm->mmu_lock);
> + /* remove_write_access() flush the tlb */
> + kvm_mmu_slot_remove_write_access(kvm, log->slot);
> + spin_unlock(&kvm->mmu_lock);
> + } else {
> + kvm_flush_remote_tlbs(kvm);
> + }
> memslot = &kvm->memslots[log->slot];
> n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
> memset(memslot->dirty_bitmap, 0, n);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index cdf1279..d1657a3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -250,6 +250,7 @@ int kvm_dev_ioctl_check_extension(long ext);
>
> int kvm_get_dirty_log(struct kvm *kvm,
> struct kvm_dirty_log *log, int *is_dirty);
> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot
> *memslot);
> int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> struct kvm_dirty_log *log);
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 3046e9c..a876231 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1135,8 +1135,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
> }
>
> /* Free page dirty bitmap if unneeded */
> - if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
> + if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) {
> new.dirty_bitmap = NULL;
> + if (old.flags & KVM_MEM_LOG_DIRTY_PAGES)
> + kvm_arch_flush_shadow(kvm);
> + }
Whats this for?
The idea of making dirty bit accessible (also can use it to map host
ptes read-only, when guest fault is read-only, for KSM (*)) is good. But
better first introduce infrastructure to handle dirty bit (making sure
the bit is transferred properly), then have logging make use of it.
* EPT violations do no transfer fault information to the page fault
handler, but its available (there's a vm-exit field).
> r = -ENOMEM;
>
> @@ -1279,6 +1282,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
> if (!memslot->dirty_bitmap)
> goto out;
>
> + kvm_arch_get_dirty_log(kvm, memslot);
> n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
>
> for (i = 0; !any && i < n/sizeof(long); ++i)
> --
> 1.5.6.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html