On Fri, Jun 17, 2022 at 09:30:53PM +0000, Sean Christopherson wrote:
> On Thu, May 19, 2022, Chao Peng wrote:
> > @@ -4028,8 +4081,11 @@ static bool is_page_fault_stale(struct kvm_vcpu 
> > *vcpu,
> >     if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
> >             return true;
> >  
> > -   return fault->slot &&
> > -          mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> > +   if (fault->is_private)
> > +           return mmu_notifier_retry(vcpu->kvm, mmu_seq);
> 
> Hmm, this is somewhat undesirable, because faulting in private pfns will be 
> blocked
> by unrelated mmu_notifier updates.  The issue is mitigated to some degree by 
> bumping
> the sequence count if and only if overlap with a memslot is detected, e.g. 
> mapping
> changes that affects only userspace won't block the guest.
> 
> It probably won't be an issue, but at the same time it's easy to solve, and I 
> don't
> like piggybacking mmu_notifier_seq as private mappings shouldn't be subject 
> to the
> mmu_notifier.
> 
> That would also fix a theoretical bug in this patch where mmu_notifier_retry()
> wouldn't be defined if CONFIG_MEMFILE_NOTIFIER=y && CONFIG_MMU_NOTIFIER=n.a

Agreed, Thanks.

> 
> ---
>  arch/x86/kvm/mmu/mmu.c   | 11 ++++++-----
>  include/linux/kvm_host.h | 16 +++++++++++-----
>  virt/kvm/kvm_main.c      |  2 +-
>  3 files changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 0b455c16ec64..a4cbd29433e7 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4100,10 +4100,10 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>               return true;
> 
>       if (fault->is_private)
> -             return mmu_notifier_retry(vcpu->kvm, mmu_seq);
> -     else
> -             return fault->slot &&
> -                     mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> +             return memfile_notifier_retry(vcpu->kvm, mmu_seq);
> +
> +     return fault->slot &&
> +            mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
>  }
> 
>  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
> @@ -4127,7 +4127,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault
>       if (r)
>               return r;
> 
> -     mmu_seq = vcpu->kvm->mmu_notifier_seq;
> +     mmu_seq = fault->is_private ? vcpu->kvm->memfile_notifier_seq :
> +                                   vcpu->kvm->mmu_notifier_seq;
>       smp_rmb();
> 
>       r = kvm_faultin_pfn(vcpu, fault);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 92afa5bddbc5..31f704c83099 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -773,16 +773,15 @@ struct kvm {
>       struct hlist_head irq_ack_notifier_list;
>  #endif
> 
> -#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) ||\
> -     defined(CONFIG_MEMFILE_NOTIFIER)
> +#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
>       unsigned long mmu_notifier_seq;
> -#endif
> -
> -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>       struct mmu_notifier mmu_notifier;
>       long mmu_notifier_count;
>       unsigned long mmu_notifier_range_start;
>       unsigned long mmu_notifier_range_end;
> +#endif
> +#ifdef CONFIG_MEMFILE_NOTIFIER
> +     unsigned long memfile_notifier_seq;
>  #endif
>       struct list_head devices;
>       u64 manual_dirty_log_protect;
> @@ -1964,6 +1963,13 @@ static inline int mmu_notifier_retry_hva(struct kvm 
> *kvm,
>  }
>  #endif
> 
> +#ifdef CONFIG_MEMFILE_NOTIFIER
> +static inline bool memfile_notifier_retry(struct kvm *kvm, unsigned long 
> mmu_seq)
> +{
> +     return kvm->memfile_notifier_seq != mmu_seq;
> +}
> +#endif
> +
>  #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
> 
>  #define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future 
> */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 2b416d3bd60e..e6d34c964d51 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -898,7 +898,7 @@ static void kvm_private_mem_notifier_handler(struct 
> memfile_notifier *notifier,
>       KVM_MMU_LOCK(kvm);
>       if (kvm_unmap_gfn_range(kvm, &gfn_range))
>               kvm_flush_remote_tlbs(kvm);
> -     kvm->mmu_notifier_seq++;
> +     kvm->memfile_notifier_seq++;
>       KVM_MMU_UNLOCK(kvm);
>       srcu_read_unlock(&kvm->srcu, idx);
>  }
> 
> base-commit: 333ef501c7f6c6d4ef2b7678905cad0f8ef3e271
> --
> 
> > +   else
> > +           return fault->slot &&
> > +                   mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> >  }
> >  
> >  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> > *fault)
> > @@ -4088,7 +4144,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
> > struct kvm_page_fault *fault
> >             read_unlock(&vcpu->kvm->mmu_lock);
> >     else
> >             write_unlock(&vcpu->kvm->mmu_lock);
> > -   kvm_release_pfn_clean(fault->pfn);
> > +
> > +   if (fault->is_private)
> > +           kvm_private_mem_put_pfn(fault->slot, fault->pfn);
> 
> Why does the shmem path lock the page, and then unlock it here?

Initially this is to prevent race between SLPT population and
truncate/punch on the fd. Without this, a gfn may become stale before
the page is populated in SLPT. However, with memfile_notifier_retry
mechanism, this sounds not needed.

> 
> Same question for why this path marks it dirty?  The guest has the page mapped
> so the dirty flag is immediately stale.

I believe so.

> 
> In other words, why does KVM need to do something different for private pfns?

These two are inherited from Kirill's previous code. See if he has any
comment.

> 
> > +   else
> > +           kvm_release_pfn_clean(fault->pfn);
> > +
> >     return r;
> >  }
> >  
> 
> ...
> 
> > diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> > index 7f8f1c8dbed2..1d857919a947 100644
> > --- a/arch/x86/kvm/mmu/paging_tmpl.h
> > +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> > @@ -878,7 +878,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, 
> > struct kvm_page_fault *fault
> >  
> >  out_unlock:
> >     write_unlock(&vcpu->kvm->mmu_lock);
> > -   kvm_release_pfn_clean(fault->pfn);
> > +   if (fault->is_private)
> 
> Indirect MMUs can't support private faults, i.e. this is unnecessary.

Okay.

> 
> > +           kvm_private_mem_put_pfn(fault->slot, fault->pfn);
> > +   else
> > +           kvm_release_pfn_clean(fault->pfn);
> >     return r;
> >  }
> >  
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 3fd168972ecd..b0a7910505ed 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2241,4 +2241,26 @@ static inline void kvm_handle_signal_exit(struct 
> > kvm_vcpu *vcpu)
> >  /* Max number of entries allowed for each kvm dirty ring */
> >  #define  KVM_DIRTY_RING_MAX_ENTRIES  65536
> >  
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +static inline int kvm_private_mem_get_pfn(struct kvm_memory_slot *slot,
> > +                                     gfn_t gfn, kvm_pfn_t *pfn, int *order)
> > +{
> > +   int ret;
> > +   pfn_t pfnt;
> > +   pgoff_t index = gfn - slot->base_gfn +
> > +                   (slot->private_offset >> PAGE_SHIFT);
> > +
> > +   ret = slot->notifier.bs->get_lock_pfn(slot->private_file, index, &pfnt,
> > +                                           order);
> > +   *pfn = pfn_t_to_pfn(pfnt);
> > +   return ret;
> > +}
> > +
> > +static inline void kvm_private_mem_put_pfn(struct kvm_memory_slot *slot,
> > +                                      kvm_pfn_t pfn)
> > +{
> > +   slot->notifier.bs->put_unlock_pfn(pfn_to_pfn_t(pfn));
> > +}
> > +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> > +
> >  #endif
> > -- 
> > 2.25.1
> > 

Reply via email to