Re: [PATCH 05/10] KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk()

2015-11-17 Thread Paolo Bonzini


On 16/11/2015 03:51, Takuya Yoshikawa wrote:
> What kvm_mmu_mark_parents_unsync() does is:
> 
>   for each p_i in sp->parent_ptes rmap chain
> mark_unsync(p_i);
> 
> Then, mark_unsync() finds the parent sp including that p_i to
> set ->unsync_child_bitmap and increment ->unsync_children if
> necessary.  It may also call kvm_mmu_mark_parents_unsync()
> recursively.

I agree.  sp->parent_ptes goes up one level from sp;
kvm_mmu_mark_parents_unsync(sp) visits the level above sp, while
mark_unsync(sp) visit sp and all the levels above it.

Calling mark_unsync(parent_pte) is enough to complete the visit, after
kvm_mmu_mark_parents_unsync has already processed the level above sp.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/10] KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk()

2015-11-17 Thread Xiao Guangrong



On 11/12/2015 07:52 PM, Takuya Yoshikawa wrote:

kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does
nearly the same as the for_each_rmap_spte macro.  The only difference
is that is_shadow_present_pte() checks cannot be placed there because
kvm_mmu_mark_parents_unsync() can be called with a new parent pointer
whose entry is not set yet.

By calling mark_unsync() separately for the parent and adding the parent
pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro
works with no problem.

Signed-off-by: Takuya Yoshikawa 
---
  arch/x86/kvm/mmu.c | 36 +---
  1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e8cfdc4..1691171 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long 
*pte_list)
}
  }

-typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
-{
-   struct pte_list_desc *desc;
-   int i;
-
-   if (!*pte_list)
-   return;
-
-   if (!(*pte_list & 1))
-   return fn((u64 *)*pte_list);
-
-   desc = (struct pte_list_desc *)(*pte_list & ~1ul);
-   while (desc) {
-   for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
-   fn(desc->sptes[i]);
-   desc = desc->more;
-   }
-}
-
  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
  {
@@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
kvm_vcpu *vcpu,
  static void mark_unsync(u64 *spte);
  static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
  {
-   pte_list_walk(>parent_ptes, mark_unsync);
+   u64 *sptep;
+   struct rmap_iterator iter;
+
+   for_each_rmap_spte(>parent_ptes, , sptep) {
+   mark_unsync(sptep);
+   }
  }

  static void mark_unsync(u64 *spte)
@@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
break;

-   mmu_page_add_parent_pte(vcpu, sp, parent_pte);
if (sp->unsync_children) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
kvm_mmu_mark_parents_unsync(sp);


After your change above, the @sp's parents have not been changed, no need to 
call it now.


-   } else if (sp->unsync)
+   if (parent_pte)
+   mark_unsync(parent_pte);
+   } else if (sp->unsync) {
kvm_mmu_mark_parents_unsync(sp);


Ditto.


+   if (parent_pte)
+   mark_unsync(parent_pte);
+   }
+   mmu_page_add_parent_pte(vcpu, sp, parent_pte);

__clear_sp_write_flooding_count(sp);
trace_kvm_mmu_get_page(sp, false);


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/10] KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk()

2015-11-15 Thread Marcelo Tosatti
On Fri, Nov 13, 2015 at 07:47:28PM -0200, Marcelo Tosatti wrote:
> On Thu, Nov 12, 2015 at 08:52:45PM +0900, Takuya Yoshikawa wrote:
> > kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does
> > nearly the same as the for_each_rmap_spte macro.  The only difference
> > is that is_shadow_present_pte() checks cannot be placed there because
> > kvm_mmu_mark_parents_unsync() can be called with a new parent pointer
> > whose entry is not set yet.
> > 
> > By calling mark_unsync() separately for the parent and adding the parent
> > pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro
> > works with no problem.
> > 
> > Signed-off-by: Takuya Yoshikawa 
> > ---
> >  arch/x86/kvm/mmu.c | 36 +---
> >  1 file changed, 13 insertions(+), 23 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index e8cfdc4..1691171 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long 
> > *pte_list)
> > }
> >  }
> >  
> > -typedef void (*pte_list_walk_fn) (u64 *spte);
> > -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
> > -{
> > -   struct pte_list_desc *desc;
> > -   int i;
> > -
> > -   if (!*pte_list)
> > -   return;
> > -
> > -   if (!(*pte_list & 1))
> > -   return fn((u64 *)*pte_list);
> > -
> > -   desc = (struct pte_list_desc *)(*pte_list & ~1ul);
> > -   while (desc) {
> > -   for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
> > -   fn(desc->sptes[i]);
> > -   desc = desc->more;
> > -   }
> > -}
> > -
> >  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
> > struct kvm_memory_slot *slot)
> >  {
> > @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page 
> > *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
> >  static void mark_unsync(u64 *spte);
> >  static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
> >  {
> > -   pte_list_walk(>parent_ptes, mark_unsync);
> > +   u64 *sptep;
> > +   struct rmap_iterator iter;
> > +
> > +   for_each_rmap_spte(>parent_ptes, , sptep) {
> > +   mark_unsync(sptep);
> > +   }
> >  }
> >  
> >  static void mark_unsync(u64 *spte)
> > @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
> > kvm_vcpu *vcpu,
> 
> Faulting a spte, and one of the levels of sptes, either
> 
> 
>   spte-1
>   spte-2
>   spte-3
> 
> has present bit clear. So we're searching for a guest page to shadow, with
> gfn "gfn".
> 
> > if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
> > break;
> 
> If a shadow for gfn exists, but is unsync, sync guest-page ---to--> kvm
> sptes.
> 
> > -   mmu_page_add_parent_pte(vcpu, sp, parent_pte);
> 
> add "gfn" (actually its "struct kvm_mmu_page *sp" pointer) to
> the parent.
> > if (sp->unsync_children) {
> > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
> > kvm_mmu_mark_parents_unsync(sp);
> 
> kvm_mmu_mark_parents_unsync relied on the links from current level all
> the way to top level to mark all levels unsync, so that on guest entry,
> KVM_REQ_MMU_SYNC is processed and any level is brought from guest -->
> kvm pages. This now fails, because you removed "mmu_page_add_parent_pte"
> (the link is not formed all the way to root).
> 
> Unless i am missing something, this is not correct.

The actual issue is this: a higher level page that had, under its children,
no out of sync pages, now, due to your addition, a child that is unsync:

initial state:
level1 

final state:

level1 -x-> level2 -x-> level3

Where -x-> are the links created by this pagefault fixing round.

If _any_ page under you is unsync (not necessarily the ones this
pagefault is accessing), you have to mark parents unsync.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/10] KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk()

2015-11-15 Thread Takuya Yoshikawa

On 2015/11/14 18:20, Marcelo Tosatti wrote:


The actual issue is this: a higher level page that had, under its children,
no out of sync pages, now, due to your addition, a child that is unsync:

initial state:
level1

final state:

level1 -x-> level2 -x-> level3

Where -x-> are the links created by this pagefault fixing round.

If _any_ page under you is unsync (not necessarily the ones this
pagefault is accessing), you have to mark parents unsync.


I understand this, but I don't think my patch will break this.

What kvm_mmu_mark_parents_unsync() does is:

  for each p_i in sp->parent_ptes rmap chain
mark_unsync(p_i);

Then, mark_unsync() finds the parent sp including that p_i to
set ->unsync_child_bitmap and increment ->unsync_children if
necessary.  It may also call kvm_mmu_mark_parents_unsync()
recursively.

I understand we need to tell the parents "you have an unsync
child/descendant" until this information reaches the top level
by that recursive calls.

But since these recursive calls cannot come back to the starting sp,
the child->parent graph has no loop, each mark_unsync(p_i) will not
be affected by other parents in that sp->parent_ptes rmap chain,
from which we started the recursive calls.


As the following code shows, my patch does mark_unsync(parent_pte)
separately, and then mmu_page_add_parent_pte(vcpu, sp, parent_pte):


-   } else if (sp->unsync)
+   if (parent_pte)
+   mark_unsync(parent_pte);
+   } else if (sp->unsync) {
kvm_mmu_mark_parents_unsync(sp);
+   if (parent_pte)
+   mark_unsync(parent_pte);
+   }
+   mmu_page_add_parent_pte(vcpu, sp, parent_pte);


So, as you worried, during each mark_unsync(p_i) is processed,
this parent_pte does not exist in that sp->parent_ptes rmap chain.

But as I explained above, this does not change anything about what
each mark_unsync(p_i) call does, so keeps the original behaviour.


By the way, I think "kvm_mmu_mark_parents_unsync" and "mark_unsync"
do not tell what they actually do well. When I first saw the names,
I thought they would just set the parents' sp->unsync.

To reflect the following meaning better, it should be
propagate_unsync(_to_parents) or something:

  Tell the parents "you have an unsync child/descendant"
  until this unsync information reaches the top level


Thanks,
  Takuya


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/10] KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk()

2015-11-13 Thread Marcelo Tosatti
On Thu, Nov 12, 2015 at 08:52:45PM +0900, Takuya Yoshikawa wrote:
> kvm_mmu_mark_parents_unsync() alone uses pte_list_walk(), witch does
> nearly the same as the for_each_rmap_spte macro.  The only difference
> is that is_shadow_present_pte() checks cannot be placed there because
> kvm_mmu_mark_parents_unsync() can be called with a new parent pointer
> whose entry is not set yet.
> 
> By calling mark_unsync() separately for the parent and adding the parent
> pointer to the parent_ptes chain later in kvm_mmu_get_page(), the macro
> works with no problem.
> 
> Signed-off-by: Takuya Yoshikawa 
> ---
>  arch/x86/kvm/mmu.c | 36 +---
>  1 file changed, 13 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index e8cfdc4..1691171 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, unsigned long 
> *pte_list)
>   }
>  }
>  
> -typedef void (*pte_list_walk_fn) (u64 *spte);
> -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
> -{
> - struct pte_list_desc *desc;
> - int i;
> -
> - if (!*pte_list)
> - return;
> -
> - if (!(*pte_list & 1))
> - return fn((u64 *)*pte_list);
> -
> - desc = (struct pte_list_desc *)(*pte_list & ~1ul);
> - while (desc) {
> - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
> - fn(desc->sptes[i]);
> - desc = desc->more;
> - }
> -}
> -
>  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
>   struct kvm_memory_slot *slot)
>  {
> @@ -1741,7 +1721,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
> kvm_vcpu *vcpu,
>  static void mark_unsync(u64 *spte);
>  static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
>  {
> - pte_list_walk(>parent_ptes, mark_unsync);
> + u64 *sptep;
> + struct rmap_iterator iter;
> +
> + for_each_rmap_spte(>parent_ptes, , sptep) {
> + mark_unsync(sptep);
> + }
>  }
>  
>  static void mark_unsync(u64 *spte)
> @@ -2111,12 +2096,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
> kvm_vcpu *vcpu,

Faulting a spte, and one of the levels of sptes, either


spte-1
spte-2
spte-3

has present bit clear. So we're searching for a guest page to shadow, with
gfn "gfn".

>   if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
>   break;

If a shadow for gfn exists, but is unsync, sync guest-page ---to--> kvm
sptes.

> - mmu_page_add_parent_pte(vcpu, sp, parent_pte);

add "gfn" (actually its "struct kvm_mmu_page *sp" pointer) to
the parent.
>   if (sp->unsync_children) {
>   kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
>   kvm_mmu_mark_parents_unsync(sp);

kvm_mmu_mark_parents_unsync relied on the links from current level all
the way to top level to mark all levels unsync, so that on guest entry,
KVM_REQ_MMU_SYNC is processed and any level is brought from guest -->
kvm pages. This now fails, because you removed "mmu_page_add_parent_pte"
(the link is not formed all the way to root).

Unless i am missing something, this is not correct.

> - } else if (sp->unsync)
> + if (parent_pte)
> + mark_unsync(parent_pte);
> + } else if (sp->unsync) {
>   kvm_mmu_mark_parents_unsync(sp);
> + if (parent_pte)
> + mark_unsync(parent_pte);
> + }
> + mmu_page_add_parent_pte(vcpu, sp, parent_pte);
>  
>   __clear_sp_write_flooding_count(sp);
>   trace_kvm_mmu_get_page(sp, false);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html