Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2023-01-28 Thread Chao Peng
On Fri, Jan 13, 2023 at 11:16:27PM +, Sean Christopherson wrote:
> On Fri, Dec 02, 2022, Chao Peng wrote:
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 9a07380f8d3c..5aefcff614d2 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -12362,6 +12362,8 @@ static int kvm_alloc_memslot_metadata(struct kvm 
> > *kvm,
> > if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 
> > 1))
> > linfo[lpages - 1].disallow_lpage = 1;
> > ugfn = slot->userspace_addr >> PAGE_SHIFT;
> > +   if (kvm_slot_can_be_private(slot))
> > +   ugfn |= slot->restricted_offset >> PAGE_SHIFT;
> > /*
> >  * If the gfn and userspace address are not aligned wrt each
> >  * other, disable large page support for this slot.
> 
> Forgot to talk about the bug.  This code needs to handle the scenario where a
> memslot is created with existing, non-uniform attributes.  It might be a bit 
> ugly
> (I didn't even try to write the code), but it's definitely possible, and since
> memslot updates are already slow I think it's best to handle things here.
> 
> In the meantime, I added this so we don't forget to fix it before merging.
> 
> #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
>   pr_crit_once("FIXME: Walk the memory attributes of the slot and set the 
> mixed status appropriately");
> #endif

Here is the code to fix (based on your latest github repo).

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e552374f2357..609ff1cba9c5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2195,4 +2195,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, 
unsigned long npages);
 KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
 KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
 
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+void kvm_memory_attributes_create_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+#endif
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index eda615f3951c..8833d7201e41 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7201,10 +7201,11 @@ static bool has_mixed_attrs(struct kvm *kvm, struct 
kvm_memory_slot *slot,
return false;
 }
 
-void kvm_arch_set_memory_attributes(struct kvm *kvm,
-   struct kvm_memory_slot *slot,
-   unsigned long attrs,
-   gfn_t start, gfn_t end)
+static void kvm_update_lpage_mixed_flag(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   bool set_attrs,
+   unsigned long attrs,
+   gfn_t start, gfn_t end)
 {
unsigned long pages, mask;
gfn_t gfn, gfn_end, first, last;
@@ -7231,25 +7232,53 @@ void kvm_arch_set_memory_attributes(struct kvm *kvm,
first = start & mask;
last = (end - 1) & mask;
 
-   /*
-* We only need to scan the head and tail page, for middle pages
-* we know they will not be mixed.
-*/
+   /* head page */
gfn = max(first, slot->base_gfn);
gfn_end = min(first + pages, slot->base_gfn + slot->npages);
+   if(!set_attrs)
+   attrs = kvm_get_memory_attributes(kvm, gfn);
mixed = has_mixed_attrs(kvm, slot, level, attrs, gfn, gfn_end);
linfo_update_mixed(gfn, slot, level, mixed);
 
if (first == last)
return;
 
-   for (gfn = first + pages; gfn < last; gfn += pages)
-   linfo_update_mixed(gfn, slot, level, false);
+   /* middle pages */
+   for (gfn = first + pages; gfn < last; gfn += pages) {
+   if (set_attrs) {
+   mixed = false;
+   } else {
+   gfn_end = gfn + pages;
+   attrs = kvm_get_memory_attributes(kvm, gfn);
+   mixed = has_mixed_attrs(kvm, slot, level, attrs,
+   gfn, gfn_end);
+   }
+   linfo_update_mixed(gfn, slot, level, mixed);
+   }
 
+   /* tail page */
gfn = last;
gfn_end = min(last + pages, slot->base_gfn + slot->npages);
+   if(!set_attrs)
+   attrs = kvm_get_memory_attributes(kvm, gfn);
mixed = has_mixed_attrs(kvm, slot, level, attrs, gfn, gfn_end);
linfo_update_mixed(gfn, slot, level, mixed);
}
 }
+
+void 

Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2023-01-13 Thread Sean Christopherson
On Fri, Dec 02, 2022, Chao Peng wrote:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9a07380f8d3c..5aefcff614d2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12362,6 +12362,8 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
>   if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 
> 1))
>   linfo[lpages - 1].disallow_lpage = 1;
>   ugfn = slot->userspace_addr >> PAGE_SHIFT;
> + if (kvm_slot_can_be_private(slot))
> + ugfn |= slot->restricted_offset >> PAGE_SHIFT;
>   /*
>* If the gfn and userspace address are not aligned wrt each
>* other, disable large page support for this slot.

Forgot to talk about the bug.  This code needs to handle the scenario where a
memslot is created with existing, non-uniform attributes.  It might be a bit 
ugly
(I didn't even try to write the code), but it's definitely possible, and since
memslot updates are already slow I think it's best to handle things here.

In the meantime, I added this so we don't forget to fix it before merging.

#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
pr_crit_once("FIXME: Walk the memory attributes of the slot and set the 
mixed status appropriately");
#endif




Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2023-01-13 Thread Sean Christopherson
On Fri, Dec 02, 2022, Chao Peng wrote:
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 283cbb83d6ae..7772ab37ac89 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -38,6 +38,7 @@
>  #include 
>  
>  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES

No need for this, I think we should just make it mandatory to implement the
arch hook when CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES=y.  If another arch gains
support for mem attributes and doesn't need the hook, then we can simply add a
weak helper (or maybe add a #define then if we feel that's the way to go).

>  #define KVM_MAX_VCPUS 1024
>  
> @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
>  #endif
>  };
>  
> +/*
> + * Use a bit in disallow_lpage to indicate private/shared pages mixed at the
> + * level. The remaining bits are used as a reference count.
> + */
> +#define KVM_LPAGE_PRIVATE_SHARED_MIXED   (1U << 31)

Similar to the need to unmap, I think we should just say "mixed" and ignore the
private vs. shared, i.e. make this a flag for all memory attributes.

> +#define KVM_LPAGE_COUNT_MAX  ((1U << 31) - 1)

"MAX" is technically correct, but it's more of a mask.  I think we can make it a
moot point though.  There's no need to mask the count, we just want to assert 
that
adjusting the counting doesn't change the flag.

I would also say throw these defines into mmu.c, at least pending the bug fix
for kvm_alloc_memslot_metadata() (more on that below).

>  struct kvm_lpage_info {
>   int disallow_lpage;
>  };
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e2c70b5afa3e..2190fd8c95c0 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const 
> struct kvm_memory_slot *slot,
>  {
>   struct kvm_lpage_info *linfo;
>   int i;
> + int disallow_count;
>  
>   for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
>   linfo = lpage_info_slot(gfn, slot, i);
> +
> + disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX;
> + WARN_ON(disallow_count + count < 0 ||
> + disallow_count > KVM_LPAGE_COUNT_MAX - count);
> +
>   linfo->disallow_lpage += count;
> - WARN_ON(linfo->disallow_lpage < 0);

It's been a long week so don't trust my math, but I believe this can simply be:

old = linfo->disallow_lpage;
linfo->disallow_lpage += count;

WARN_ON_ONCE((old ^ linfo->disallow_lpage) & 
KVM_LPAGE_MIXED_FLAG);
>   }
>  }
>  
> @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
>   if (kvm->arch.nx_huge_page_recovery_thread)
>   kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
>  }
> +
> +static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
> +{
> + return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
> +}
> +
> +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
> + int level, bool mixed)
> +{
> + struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level);
> +
> + if (mixed)
> + linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
> + else
> + linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
> +}
> +
> +static bool is_expected_attr_entry(void *entry, unsigned long expected_attrs)
> +{
> + bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> +
> + if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
> + if (!expect_private)
> + return false;
> + } else if (expect_private)
> + return false;

This is messy.  If we drop the private vs. shared specifity, this can go away if
we add a helper to get attributes

static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, 
gfn_t gfn)
{
return xa_to_value(xa_load(>mem_attr_array, gfn));
}

and then we can do


if (KVM_BUG_ON(gfn != xas.xa_index, kvm) ||
attrs != kvm_get_memory_attributes(kvm, gfn)) {
mixed = true;
break;
}

and

if (linfo_is_mixed(lpage_info_slot(gfn, slot, level - 1)) ||
attrs != kvm_get_memory_attributes(kvm, gfn))
return true;


> +
> + return true;
> +}
> +
> +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
> +gfn_t start, gfn_t end)
> +{
> + XA_STATE(xas, >mem_attr_array, start);
> + gfn_t gfn = start;
> + void *entry;
> + bool mixed = false;
> +
> + rcu_read_lock();
> + entry = xas_load();
> + while (gfn < end) {
> + if (xas_retry(, entry))
> + continue;
> +
> +  

Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2022-12-08 Thread Chao Peng
On Tue, Dec 06, 2022 at 10:42:24PM -0800, Isaku Yamahata wrote:
> On Tue, Dec 06, 2022 at 08:02:24PM +0800,
> Chao Peng  wrote:
> 
> > On Mon, Dec 05, 2022 at 02:49:59PM -0800, Isaku Yamahata wrote:
> > > On Fri, Dec 02, 2022 at 02:13:45PM +0800,
> > > Chao Peng  wrote:
> > > 
> > > > A large page with mixed private/shared subpages can't be mapped as large
> > > > page since its sub private/shared pages are from different memory
> > > > backends and may also treated by architecture differently. When
> > > > private/shared memory are mixed in a large page, the current lpage_info
> > > > is not sufficient to decide whether the page can be mapped as large page
> > > > or not and additional private/shared mixed information is needed.
> > > > 
> > > > Tracking this 'mixed' information with the current 'count' like
> > > > disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage'
> > > > to indicate a large page has mixed private/share subpages and update
> > > > this 'mixed' bit whenever the memory attribute is changed between
> > > > private and shared.
> > > > 
> > > > Signed-off-by: Chao Peng 
> > > > ---
> > > >  arch/x86/include/asm/kvm_host.h |   8 ++
> > > >  arch/x86/kvm/mmu/mmu.c  | 134 +++-
> > > >  arch/x86/kvm/x86.c  |   2 +
> > > >  include/linux/kvm_host.h|  19 +
> > > >  virt/kvm/kvm_main.c |   9 ++-
> > > >  5 files changed, 169 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/include/asm/kvm_host.h 
> > > > b/arch/x86/include/asm/kvm_host.h
> > > > index 283cbb83d6ae..7772ab37ac89 100644
> > > > --- a/arch/x86/include/asm/kvm_host.h
> > > > +++ b/arch/x86/include/asm/kvm_host.h
> > > > @@ -38,6 +38,7 @@
> > > >  #include 
> > > >  
> > > >  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> > > > +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES
> > > >  
> > > >  #define KVM_MAX_VCPUS 1024
> > > >  
> > > > @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
> > > >  #endif
> > > >  };
> > > >  
> > > > +/*
> > > > + * Use a bit in disallow_lpage to indicate private/shared pages mixed 
> > > > at the
> > > > + * level. The remaining bits are used as a reference count.
> > > > + */
> > > > +#define KVM_LPAGE_PRIVATE_SHARED_MIXED (1U << 31)
> > > > +#define KVM_LPAGE_COUNT_MAX((1U << 31) - 1)
> > > > +
> > > >  struct kvm_lpage_info {
> > > > int disallow_lpage;
> > > >  };
> > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > index e2c70b5afa3e..2190fd8c95c0 100644
> > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > > @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const 
> > > > struct kvm_memory_slot *slot,
> > > >  {
> > > > struct kvm_lpage_info *linfo;
> > > > int i;
> > > > +   int disallow_count;
> > > >  
> > > > for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
> > > > linfo = lpage_info_slot(gfn, slot, i);
> > > > +
> > > > +   disallow_count = linfo->disallow_lpage & 
> > > > KVM_LPAGE_COUNT_MAX;
> > > > +   WARN_ON(disallow_count + count < 0 ||
> > > > +   disallow_count > KVM_LPAGE_COUNT_MAX - count);
> > > > +
> > > > linfo->disallow_lpage += count;
> > > > -   WARN_ON(linfo->disallow_lpage < 0);
> > > > }
> > > >  }
> > > >  
> > > > @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
> > > > if (kvm->arch.nx_huge_page_recovery_thread)
> > > > kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
> > > >  }
> > > > +
> > > > +static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
> > > > +{
> > > > +   return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > > +}
> > > > +
> > > > +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
> > > > +   int level, bool mixed)
> > > > +{
> > > > +   struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, 
> > > > level);
> > > > +
> > > > +   if (mixed)
> > > > +   linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > > +   else
> > > > +   linfo->disallow_lpage &= 
> > > > ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > > +}
> > > > +
> > > > +static bool is_expected_attr_entry(void *entry, unsigned long 
> > > > expected_attrs)
> > > > +{
> > > > +   bool expect_private = expected_attrs & 
> > > > KVM_MEMORY_ATTRIBUTE_PRIVATE;
> > > > +
> > > > +   if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
> > > > +   if (!expect_private)
> > > > +   return false;
> > > > +   } else if (expect_private)
> > > > +   return false;
> > > > +
> > > > +   return true;
> > > > +}
> > > > +
> > > > +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
> > > > +  gfn_t start, gfn_t end)
> > > > 

Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2022-12-06 Thread Isaku Yamahata
On Tue, Dec 06, 2022 at 08:02:24PM +0800,
Chao Peng  wrote:

> On Mon, Dec 05, 2022 at 02:49:59PM -0800, Isaku Yamahata wrote:
> > On Fri, Dec 02, 2022 at 02:13:45PM +0800,
> > Chao Peng  wrote:
> > 
> > > A large page with mixed private/shared subpages can't be mapped as large
> > > page since its sub private/shared pages are from different memory
> > > backends and may also treated by architecture differently. When
> > > private/shared memory are mixed in a large page, the current lpage_info
> > > is not sufficient to decide whether the page can be mapped as large page
> > > or not and additional private/shared mixed information is needed.
> > > 
> > > Tracking this 'mixed' information with the current 'count' like
> > > disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage'
> > > to indicate a large page has mixed private/share subpages and update
> > > this 'mixed' bit whenever the memory attribute is changed between
> > > private and shared.
> > > 
> > > Signed-off-by: Chao Peng 
> > > ---
> > >  arch/x86/include/asm/kvm_host.h |   8 ++
> > >  arch/x86/kvm/mmu/mmu.c  | 134 +++-
> > >  arch/x86/kvm/x86.c  |   2 +
> > >  include/linux/kvm_host.h|  19 +
> > >  virt/kvm/kvm_main.c |   9 ++-
> > >  5 files changed, 169 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/x86/include/asm/kvm_host.h 
> > > b/arch/x86/include/asm/kvm_host.h
> > > index 283cbb83d6ae..7772ab37ac89 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -38,6 +38,7 @@
> > >  #include 
> > >  
> > >  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> > > +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES
> > >  
> > >  #define KVM_MAX_VCPUS 1024
> > >  
> > > @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
> > >  #endif
> > >  };
> > >  
> > > +/*
> > > + * Use a bit in disallow_lpage to indicate private/shared pages mixed at 
> > > the
> > > + * level. The remaining bits are used as a reference count.
> > > + */
> > > +#define KVM_LPAGE_PRIVATE_SHARED_MIXED   (1U << 31)
> > > +#define KVM_LPAGE_COUNT_MAX  ((1U << 31) - 1)
> > > +
> > >  struct kvm_lpage_info {
> > >   int disallow_lpage;
> > >  };
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index e2c70b5afa3e..2190fd8c95c0 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const 
> > > struct kvm_memory_slot *slot,
> > >  {
> > >   struct kvm_lpage_info *linfo;
> > >   int i;
> > > + int disallow_count;
> > >  
> > >   for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
> > >   linfo = lpage_info_slot(gfn, slot, i);
> > > +
> > > + disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX;
> > > + WARN_ON(disallow_count + count < 0 ||
> > > + disallow_count > KVM_LPAGE_COUNT_MAX - count);
> > > +
> > >   linfo->disallow_lpage += count;
> > > - WARN_ON(linfo->disallow_lpage < 0);
> > >   }
> > >  }
> > >  
> > > @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
> > >   if (kvm->arch.nx_huge_page_recovery_thread)
> > >   kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
> > >  }
> > > +
> > > +static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
> > > +{
> > > + return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > +}
> > > +
> > > +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
> > > + int level, bool mixed)
> > > +{
> > > + struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level);
> > > +
> > > + if (mixed)
> > > + linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > + else
> > > + linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > > +}
> > > +
> > > +static bool is_expected_attr_entry(void *entry, unsigned long 
> > > expected_attrs)
> > > +{
> > > + bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> > > +
> > > + if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
> > > + if (!expect_private)
> > > + return false;
> > > + } else if (expect_private)
> > > + return false;
> > > +
> > > + return true;
> > > +}
> > > +
> > > +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
> > > +gfn_t start, gfn_t end)
> > > +{
> > > + XA_STATE(xas, >mem_attr_array, start);
> > > + gfn_t gfn = start;
> > > + void *entry;
> > > + bool mixed = false;
> > > +
> > > + rcu_read_lock();
> > > + entry = xas_load();
> > > + while (gfn < end) {
> > > + if (xas_retry(, entry))
> > > + continue;
> > > +
> > > + KVM_BUG_ON(gfn != xas.xa_index, kvm);
> > > +
> > > + if (!is_expected_attr_entry(entry, attrs)) {
> > > + mixed = true;
> > > + break;
> > > + }

Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2022-12-06 Thread Chao Peng
On Mon, Dec 05, 2022 at 02:49:59PM -0800, Isaku Yamahata wrote:
> On Fri, Dec 02, 2022 at 02:13:45PM +0800,
> Chao Peng  wrote:
> 
> > A large page with mixed private/shared subpages can't be mapped as large
> > page since its sub private/shared pages are from different memory
> > backends and may also treated by architecture differently. When
> > private/shared memory are mixed in a large page, the current lpage_info
> > is not sufficient to decide whether the page can be mapped as large page
> > or not and additional private/shared mixed information is needed.
> > 
> > Tracking this 'mixed' information with the current 'count' like
> > disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage'
> > to indicate a large page has mixed private/share subpages and update
> > this 'mixed' bit whenever the memory attribute is changed between
> > private and shared.
> > 
> > Signed-off-by: Chao Peng 
> > ---
> >  arch/x86/include/asm/kvm_host.h |   8 ++
> >  arch/x86/kvm/mmu/mmu.c  | 134 +++-
> >  arch/x86/kvm/x86.c  |   2 +
> >  include/linux/kvm_host.h|  19 +
> >  virt/kvm/kvm_main.c |   9 ++-
> >  5 files changed, 169 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h 
> > b/arch/x86/include/asm/kvm_host.h
> > index 283cbb83d6ae..7772ab37ac89 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -38,6 +38,7 @@
> >  #include 
> >  
> >  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> > +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES
> >  
> >  #define KVM_MAX_VCPUS 1024
> >  
> > @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
> >  #endif
> >  };
> >  
> > +/*
> > + * Use a bit in disallow_lpage to indicate private/shared pages mixed at 
> > the
> > + * level. The remaining bits are used as a reference count.
> > + */
> > +#define KVM_LPAGE_PRIVATE_SHARED_MIXED (1U << 31)
> > +#define KVM_LPAGE_COUNT_MAX((1U << 31) - 1)
> > +
> >  struct kvm_lpage_info {
> > int disallow_lpage;
> >  };
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index e2c70b5afa3e..2190fd8c95c0 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const 
> > struct kvm_memory_slot *slot,
> >  {
> > struct kvm_lpage_info *linfo;
> > int i;
> > +   int disallow_count;
> >  
> > for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
> > linfo = lpage_info_slot(gfn, slot, i);
> > +
> > +   disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX;
> > +   WARN_ON(disallow_count + count < 0 ||
> > +   disallow_count > KVM_LPAGE_COUNT_MAX - count);
> > +
> > linfo->disallow_lpage += count;
> > -   WARN_ON(linfo->disallow_lpage < 0);
> > }
> >  }
> >  
> > @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
> > if (kvm->arch.nx_huge_page_recovery_thread)
> > kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
> >  }
> > +
> > +static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
> > +{
> > +   return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > +}
> > +
> > +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
> > +   int level, bool mixed)
> > +{
> > +   struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level);
> > +
> > +   if (mixed)
> > +   linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > +   else
> > +   linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
> > +}
> > +
> > +static bool is_expected_attr_entry(void *entry, unsigned long 
> > expected_attrs)
> > +{
> > +   bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> > +
> > +   if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
> > +   if (!expect_private)
> > +   return false;
> > +   } else if (expect_private)
> > +   return false;
> > +
> > +   return true;
> > +}
> > +
> > +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
> > +  gfn_t start, gfn_t end)
> > +{
> > +   XA_STATE(xas, >mem_attr_array, start);
> > +   gfn_t gfn = start;
> > +   void *entry;
> > +   bool mixed = false;
> > +
> > +   rcu_read_lock();
> > +   entry = xas_load();
> > +   while (gfn < end) {
> > +   if (xas_retry(, entry))
> > +   continue;
> > +
> > +   KVM_BUG_ON(gfn != xas.xa_index, kvm);
> > +
> > +   if (!is_expected_attr_entry(entry, attrs)) {
> > +   mixed = true;
> > +   break;
> > +   }
> > +
> > +   entry = xas_next();
> > +   gfn++;
> > +   }
> > +
> > +   rcu_read_unlock();
> > +   return mixed;
> > +}
> > +
> > +static bool mem_attrs_mixed(struct kvm *kvm, struct kvm_memory_slot *slot,
> > +   

Re: [PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2022-12-05 Thread Isaku Yamahata
On Fri, Dec 02, 2022 at 02:13:45PM +0800,
Chao Peng  wrote:

> A large page with mixed private/shared subpages can't be mapped as large
> page since its sub private/shared pages are from different memory
> backends and may also treated by architecture differently. When
> private/shared memory are mixed in a large page, the current lpage_info
> is not sufficient to decide whether the page can be mapped as large page
> or not and additional private/shared mixed information is needed.
> 
> Tracking this 'mixed' information with the current 'count' like
> disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage'
> to indicate a large page has mixed private/share subpages and update
> this 'mixed' bit whenever the memory attribute is changed between
> private and shared.
> 
> Signed-off-by: Chao Peng 
> ---
>  arch/x86/include/asm/kvm_host.h |   8 ++
>  arch/x86/kvm/mmu/mmu.c  | 134 +++-
>  arch/x86/kvm/x86.c  |   2 +
>  include/linux/kvm_host.h|  19 +
>  virt/kvm/kvm_main.c |   9 ++-
>  5 files changed, 169 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 283cbb83d6ae..7772ab37ac89 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -38,6 +38,7 @@
>  #include 
>  
>  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES
>  
>  #define KVM_MAX_VCPUS 1024
>  
> @@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
>  #endif
>  };
>  
> +/*
> + * Use a bit in disallow_lpage to indicate private/shared pages mixed at the
> + * level. The remaining bits are used as a reference count.
> + */
> +#define KVM_LPAGE_PRIVATE_SHARED_MIXED   (1U << 31)
> +#define KVM_LPAGE_COUNT_MAX  ((1U << 31) - 1)
> +
>  struct kvm_lpage_info {
>   int disallow_lpage;
>  };
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e2c70b5afa3e..2190fd8c95c0 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const 
> struct kvm_memory_slot *slot,
>  {
>   struct kvm_lpage_info *linfo;
>   int i;
> + int disallow_count;
>  
>   for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
>   linfo = lpage_info_slot(gfn, slot, i);
> +
> + disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX;
> + WARN_ON(disallow_count + count < 0 ||
> + disallow_count > KVM_LPAGE_COUNT_MAX - count);
> +
>   linfo->disallow_lpage += count;
> - WARN_ON(linfo->disallow_lpage < 0);
>   }
>  }
>  
> @@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
>   if (kvm->arch.nx_huge_page_recovery_thread)
>   kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
>  }
> +
> +static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
> +{
> + return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
> +}
> +
> +static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
> + int level, bool mixed)
> +{
> + struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level);
> +
> + if (mixed)
> + linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
> + else
> + linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
> +}
> +
> +static bool is_expected_attr_entry(void *entry, unsigned long expected_attrs)
> +{
> + bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> +
> + if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
> + if (!expect_private)
> + return false;
> + } else if (expect_private)
> + return false;
> +
> + return true;
> +}
> +
> +static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
> +gfn_t start, gfn_t end)
> +{
> + XA_STATE(xas, >mem_attr_array, start);
> + gfn_t gfn = start;
> + void *entry;
> + bool mixed = false;
> +
> + rcu_read_lock();
> + entry = xas_load();
> + while (gfn < end) {
> + if (xas_retry(, entry))
> + continue;
> +
> + KVM_BUG_ON(gfn != xas.xa_index, kvm);
> +
> + if (!is_expected_attr_entry(entry, attrs)) {
> + mixed = true;
> + break;
> + }
> +
> + entry = xas_next();
> + gfn++;
> + }
> +
> + rcu_read_unlock();
> + return mixed;
> +}
> +
> +static bool mem_attrs_mixed(struct kvm *kvm, struct kvm_memory_slot *slot,
> + int level, unsigned long attrs,
> + gfn_t start, gfn_t end)
> +{
> + unsigned long gfn;
> +
> + if (level == PG_LEVEL_2M)
> + return mem_attrs_mixed_2m(kvm, attrs, start, end);
> +
> + for (gfn = 

[PATCH v10 7/9] KVM: Update lpage info when private/shared memory are mixed

2022-12-01 Thread Chao Peng
A large page with mixed private/shared subpages can't be mapped as large
page since its sub private/shared pages are from different memory
backends and may also treated by architecture differently. When
private/shared memory are mixed in a large page, the current lpage_info
is not sufficient to decide whether the page can be mapped as large page
or not and additional private/shared mixed information is needed.

Tracking this 'mixed' information with the current 'count' like
disallow_lpage is a bit challenge so reserve a bit in 'disallow_lpage'
to indicate a large page has mixed private/share subpages and update
this 'mixed' bit whenever the memory attribute is changed between
private and shared.

Signed-off-by: Chao Peng 
---
 arch/x86/include/asm/kvm_host.h |   8 ++
 arch/x86/kvm/mmu/mmu.c  | 134 +++-
 arch/x86/kvm/x86.c  |   2 +
 include/linux/kvm_host.h|  19 +
 virt/kvm/kvm_main.c |   9 ++-
 5 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 283cbb83d6ae..7772ab37ac89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,6 +38,7 @@
 #include 
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+#define __KVM_HAVE_ARCH_SET_MEMORY_ATTRIBUTES
 
 #define KVM_MAX_VCPUS 1024
 
@@ -1011,6 +1012,13 @@ struct kvm_vcpu_arch {
 #endif
 };
 
+/*
+ * Use a bit in disallow_lpage to indicate private/shared pages mixed at the
+ * level. The remaining bits are used as a reference count.
+ */
+#define KVM_LPAGE_PRIVATE_SHARED_MIXED (1U << 31)
+#define KVM_LPAGE_COUNT_MAX((1U << 31) - 1)
+
 struct kvm_lpage_info {
int disallow_lpage;
 };
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e2c70b5afa3e..2190fd8c95c0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -763,11 +763,16 @@ static void update_gfn_disallow_lpage_count(const struct 
kvm_memory_slot *slot,
 {
struct kvm_lpage_info *linfo;
int i;
+   int disallow_count;
 
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
linfo = lpage_info_slot(gfn, slot, i);
+
+   disallow_count = linfo->disallow_lpage & KVM_LPAGE_COUNT_MAX;
+   WARN_ON(disallow_count + count < 0 ||
+   disallow_count > KVM_LPAGE_COUNT_MAX - count);
+
linfo->disallow_lpage += count;
-   WARN_ON(linfo->disallow_lpage < 0);
}
 }
 
@@ -6986,3 +6991,130 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
if (kvm->arch.nx_huge_page_recovery_thread)
kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
 }
+
+static bool linfo_is_mixed(struct kvm_lpage_info *linfo)
+{
+   return linfo->disallow_lpage & KVM_LPAGE_PRIVATE_SHARED_MIXED;
+}
+
+static void linfo_set_mixed(gfn_t gfn, struct kvm_memory_slot *slot,
+   int level, bool mixed)
+{
+   struct kvm_lpage_info *linfo = lpage_info_slot(gfn, slot, level);
+
+   if (mixed)
+   linfo->disallow_lpage |= KVM_LPAGE_PRIVATE_SHARED_MIXED;
+   else
+   linfo->disallow_lpage &= ~KVM_LPAGE_PRIVATE_SHARED_MIXED;
+}
+
+static bool is_expected_attr_entry(void *entry, unsigned long expected_attrs)
+{
+   bool expect_private = expected_attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
+
+   if (xa_to_value(entry) & KVM_MEMORY_ATTRIBUTE_PRIVATE) {
+   if (!expect_private)
+   return false;
+   } else if (expect_private)
+   return false;
+
+   return true;
+}
+
+static bool mem_attrs_mixed_2m(struct kvm *kvm, unsigned long attrs,
+  gfn_t start, gfn_t end)
+{
+   XA_STATE(xas, >mem_attr_array, start);
+   gfn_t gfn = start;
+   void *entry;
+   bool mixed = false;
+
+   rcu_read_lock();
+   entry = xas_load();
+   while (gfn < end) {
+   if (xas_retry(, entry))
+   continue;
+
+   KVM_BUG_ON(gfn != xas.xa_index, kvm);
+
+   if (!is_expected_attr_entry(entry, attrs)) {
+   mixed = true;
+   break;
+   }
+
+   entry = xas_next();
+   gfn++;
+   }
+
+   rcu_read_unlock();
+   return mixed;
+}
+
+static bool mem_attrs_mixed(struct kvm *kvm, struct kvm_memory_slot *slot,
+   int level, unsigned long attrs,
+   gfn_t start, gfn_t end)
+{
+   unsigned long gfn;
+
+   if (level == PG_LEVEL_2M)
+   return mem_attrs_mixed_2m(kvm, attrs, start, end);
+
+   for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1))
+   if (linfo_is_mixed(lpage_info_slot(gfn, slot, level - 1)) ||
+   !is_expected_attr_entry(xa_load(>mem_attr_array, gfn),
+