Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Nicholas Piggin
Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> From: David Stevens 
>  out_unlock:
>   if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
>   read_unlock(>kvm->mmu_lock);
>   else
>   write_unlock(>kvm->mmu_lock);
> - kvm_release_pfn_clean(pfn);
> + if (pfnpg.page)
> + put_page(pfnpg.page);
>   return r;
>  }

How about

  kvm_release_pfn_page_clean(pfnpg);

Thanks,
Nick


Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Nicholas Piggin
Excerpts from Marc Zyngier's message of June 24, 2021 8:06 pm:
> On Thu, 24 Jun 2021 09:58:00 +0100,
> Nicholas Piggin  wrote:
>> 
>> Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
>> > From: David Stevens 
>> >  out_unlock:
>> >if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
>> >read_unlock(>kvm->mmu_lock);
>> >else
>> >write_unlock(>kvm->mmu_lock);
>> > -  kvm_release_pfn_clean(pfn);
>> > +  if (pfnpg.page)
>> > +  put_page(pfnpg.page);
>> >return r;
>> >  }
>> 
>> How about
>> 
>>   kvm_release_pfn_page_clean(pfnpg);
> 
> I'm not sure. I always found kvm_release_pfn_clean() ugly, because it
> doesn't mark the page 'clean'. I find put_page() more correct.
> 
> Something like 'kvm_put_pfn_page()' would make more sense, but I'm so
> bad at naming things that I could just as well call it 'bob()'.

That seems like a fine name to me. A little better than bob.

Thanks,
Nick


Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Paolo Bonzini

On 24/06/21 12:06, Marc Zyngier wrote:

On Thu, 24 Jun 2021 09:58:00 +0100,
Nicholas Piggin  wrote:


Excerpts from David Stevens's message of June 24, 2021 1:57 pm:

From: David Stevens 
  out_unlock:
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
read_unlock(>kvm->mmu_lock);
else
write_unlock(>kvm->mmu_lock);
-   kvm_release_pfn_clean(pfn);
+   if (pfnpg.page)
+   put_page(pfnpg.page);
return r;
  }


How about

   kvm_release_pfn_page_clean(pfnpg);


I'm not sure. I always found kvm_release_pfn_clean() ugly, because it
doesn't mark the page 'clean'. I find put_page() more correct.

Something like 'kvm_put_pfn_page()' would make more sense, but I'm so
bad at naming things that I could just as well call it 'bob()'.


The best way to go would be to get rid of kvm_release_pfn_clean() and 
always go through a pfn_page.  Then we could or could not introduce 
wrappers kvm_put_pfn_page{,_dirty}.


I think for now it's best to limit the churn since these patches will go 
in the stable releases too, and clean up the resulting API once we have 
a clear idea of how all architectures are using kvm_pfn_page.


Paolo



Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Marc Zyngier
On Thu, 24 Jun 2021 09:58:00 +0100,
Nicholas Piggin  wrote:
> 
> Excerpts from David Stevens's message of June 24, 2021 1:57 pm:
> > From: David Stevens 
> >  out_unlock:
> > if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
> > read_unlock(>kvm->mmu_lock);
> > else
> > write_unlock(>kvm->mmu_lock);
> > -   kvm_release_pfn_clean(pfn);
> > +   if (pfnpg.page)
> > +   put_page(pfnpg.page);
> > return r;
> >  }
> 
> How about
> 
>   kvm_release_pfn_page_clean(pfnpg);

I'm not sure. I always found kvm_release_pfn_clean() ugly, because it
doesn't mark the page 'clean'. I find put_page() more correct.

Something like 'kvm_put_pfn_page()' would make more sense, but I'm so
bad at naming things that I could just as well call it 'bob()'.

M.

-- 
Without deviation from the norm, progress is not possible.


Re: [PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-24 Thread Paolo Bonzini

On 24/06/21 05:57, David Stevens wrote:

  static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
-   int map_writable, int max_level, kvm_pfn_t pfn,
+   int map_writable, int max_level,
+   const struct kvm_pfn_page *pfnpg,
bool prefault, bool is_tdp)
  {
bool nx_huge_page_workaround_enabled = is_nx_huge_pa


So the main difference with my tentative patches is that here I was 
passing the struct by value.  I'll try to clean this up for 5.15, but 
for now it's all good like this.  Thanks again!


Paolo



[PATCH 3/6] KVM: x86/mmu: avoid struct page in MMU

2021-06-23 Thread David Stevens
From: David Stevens 

Avoid converting pfns returned by follow_fault_pfn to struct pages to
transiently take a reference. The reference was originally taken to
match the reference taken by gup. However, pfns returned by
follow_fault_pfn may not have a struct page set up for reference
counting.

Signed-off-by: David Stevens 
---
 arch/x86/kvm/mmu/mmu.c  | 56 +++--
 arch/x86/kvm/mmu/mmu_audit.c| 13 
 arch/x86/kvm/mmu/mmu_internal.h |  3 +-
 arch/x86/kvm/mmu/paging_tmpl.h  | 36 -
 arch/x86/kvm/mmu/tdp_mmu.c  |  7 +++--
 arch/x86/kvm/mmu/tdp_mmu.h  |  4 +--
 arch/x86/kvm/x86.c  |  9 +++---
 7 files changed, 73 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 84913677c404..8fa4a4a411ba 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2610,16 +2610,16 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 
*sptep,
return ret;
 }
 
-static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
-bool no_dirty_log)
+static struct kvm_pfn_page pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu,
+  gfn_t gfn, bool no_dirty_log)
 {
struct kvm_memory_slot *slot;
 
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
if (!slot)
-   return KVM_PFN_ERR_FAULT;
+   return KVM_PFN_PAGE_ERR(KVM_PFN_ERR_FAULT);
 
-   return kvm_pfn_page_unwrap(gfn_to_pfn_memslot_atomic(slot, gfn));
+   return gfn_to_pfn_memslot_atomic(slot, gfn);
 }
 
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
@@ -2748,7 +2748,8 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
 
 int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
int max_level, kvm_pfn_t *pfnp,
-   bool huge_page_disallowed, int *req_level)
+   struct page *page, bool huge_page_disallowed,
+   int *req_level)
 {
struct kvm_memory_slot *slot;
kvm_pfn_t pfn = *pfnp;
@@ -2760,6 +2761,9 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t 
gfn,
if (unlikely(max_level == PG_LEVEL_4K))
return PG_LEVEL_4K;
 
+   if (!page)
+   return PG_LEVEL_4K;
+
if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn))
return PG_LEVEL_4K;
 
@@ -2814,7 +2818,8 @@ void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int 
cur_level,
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
-   int map_writable, int max_level, kvm_pfn_t pfn,
+   int map_writable, int max_level,
+   const struct kvm_pfn_page *pfnpg,
bool prefault, bool is_tdp)
 {
bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
@@ -2826,11 +2831,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
int level, req_level, ret;
gfn_t gfn = gpa >> PAGE_SHIFT;
gfn_t base_gfn = gfn;
+   kvm_pfn_t pfn = pfnpg->pfn;
 
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
return RET_PF_RETRY;
 
-   level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, ,
+   level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, , pfnpg->page,
huge_page_disallowed, _level);
 
trace_kvm_mmu_spte_requested(gpa, level, pfn);
@@ -3672,8 +3678,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
-bool write, bool *writable)
+gpa_t cr2_or_gpa, struct kvm_pfn_page *pfnpg,
+hva_t *hva, bool write, bool *writable)
 {
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3688,17 +3694,16 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
 
/* Don't expose private memslots to L2. */
if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) {
-   *pfn = KVM_PFN_NOSLOT;
+   *pfnpg = KVM_PFN_PAGE_ERR(KVM_PFN_NOSLOT);
*writable = false;
return false;
}
 
async = false;
-   *pfn = kvm_pfn_page_unwrap(__gfn_to_pfn_memslot(slot, gfn, false,
-   , write,
-   writable, hva));
+   *pfnpg = __gfn_to_pfn_memslot(slot, gfn, false, ,
+ write, writable, hva);
if (!async)
-   return false; /* *pfn has correct page already */
+   return