Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-29 Thread Xiao Guangrong
On 05/29/2013 08:25 PM, Marcelo Tosatti wrote:
> On Tue, May 28, 2013 at 10:51:38PM +0800, Xiao Guangrong wrote:
>> On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
>>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp->spt));
 -  hlist_del(>hash_link);
 +  hlist_del_init(>hash_link);
list_del(>link);
free_page((unsigned long)sp->spt);
if (!sp->role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
 *sp,
 -  struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 + bool zap_unsync_children,
 + struct list_head *invalid_list)
  {
 -  int ret;
 +  int ret = 0;
  
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
 -  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 +  if (likely(zap_unsync_children))
 +  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
>>>
>>> Why is this an important case to be optimized?
>>>
>>> 1) shadow is the uncommon, obsolete case.
>>> 2) mmu_zap_unsync_children has
>>>
>>> if (parent->role.level == PT_PAGE_TABLE_LEVEL)
>>> return 0;
>>>
>>> So the large majority of pages are already optimized.
>>
>> Hmm, if we zap the high level page (e.g level = 4), it should walk its
>> children and its children's children. It is high overload.
>> (IMHO, trivial optimization is still necessary, especially, the change
>> is really slight.)
>>
>> And, there is another point me mentioned in the changelog:
>> "Also, we delete the page from shadow page cache so that the page
>> is completely isolated after call this function."
>> Skipping zapping unsync-children can ensure that only one page is
>> zapped so that we can use "hlist_del_init(>hash_link)" to completely
>> remove the page from mmu-cache.
>>
>> Now, Gleb and i got a agreement that skipping obsolete page when
>> walking hash list is a better way.
>>
>> BTW, zapping unsync-children is unnecessary, is it?
> 
> It is necessary that if an unsync page exists, that 
> invlpg emulation is able to reach it, or that at kvm_mmu_get_page 
> time they are synchronized.

Hmmm? It is not always better.

If unsync pages is zapped, mmu will map a new alloced-page which all
of its entries are nonpresent. It can cause more #PF than the case
we sync the page.
Especially, for the invlpg case, in that case you zap the page which
still have been being mapped on other vcpu's page table which currently
being used.

And, It does the further-possible work at once - spend time to walk/zap all
the unsync children but they may not be used at all. So delaying this work
until they are used is better.

> 
> You transfer the synchronization work to pagefault time, which directly
> affects guest performance, while it could have been done by the host
> (this was the reason for zapping unsync children).

It seem no, most case doing zap_page is in the vcpu context, not host.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-29 Thread Marcelo Tosatti
On Tue, May 28, 2013 at 10:51:38PM +0800, Xiao Guangrong wrote:
> On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
> > On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> >> It is only used to zap the obsolete page. Since the obsolete page
> >> will not be used, we need not spend time to find its unsync children
> >> out. Also, we delete the page from shadow page cache so that the page
> >> is completely isolated after call this function.
> >>
> >> The later patch will use it to collapse tlb flushes
> >>
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/kvm/mmu.c |   46 +-
> >>  1 files changed, 41 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> >> index 9b57faa..e676356 100644
> >> --- a/arch/x86/kvm/mmu.c
> >> +++ b/arch/x86/kvm/mmu.c
> >> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
> >> *kvm, int nr)
> >>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
> >>  {
> >>ASSERT(is_empty_shadow_page(sp->spt));
> >> -  hlist_del(>hash_link);
> >> +  hlist_del_init(>hash_link);
> >>list_del(>link);
> >>free_page((unsigned long)sp->spt);
> >>if (!sp->role.direct)
> >> @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
> >>return zapped;
> >>  }
> >>  
> >> -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
> >> *sp,
> >> -  struct list_head *invalid_list)
> >> +static int
> >> +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> >> + bool zap_unsync_children,
> >> + struct list_head *invalid_list)
> >>  {
> >> -  int ret;
> >> +  int ret = 0;
> >>  
> >>trace_kvm_mmu_prepare_zap_page(sp);
> >>++kvm->stat.mmu_shadow_zapped;
> >> -  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> >> +
> >> +  if (likely(zap_unsync_children))
> >> +  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> >> +
> > 
> > Why is this an important case to be optimized?
> > 
> > 1) shadow is the uncommon, obsolete case.
> > 2) mmu_zap_unsync_children has
> > 
> > if (parent->role.level == PT_PAGE_TABLE_LEVEL)
> > return 0;
> > 
> > So the large majority of pages are already optimized.
> 
> Hmm, if we zap the high level page (e.g level = 4), it should walk its
> children and its children's children. It is high overload.
> (IMHO, trivial optimization is still necessary, especially, the change
> is really slight.)
> 
> And, there is another point me mentioned in the changelog:
> "Also, we delete the page from shadow page cache so that the page
> is completely isolated after call this function."
> Skipping zapping unsync-children can ensure that only one page is
> zapped so that we can use "hlist_del_init(>hash_link)" to completely
> remove the page from mmu-cache.
> 
> Now, Gleb and i got a agreement that skipping obsolete page when
> walking hash list is a better way.
> 
> BTW, zapping unsync-children is unnecessary, is it?

It is necessary that if an unsync page exists, that 
invlpg emulation is able to reach it, or that at kvm_mmu_get_page 
time they are synchronized.

You transfer the synchronization work to pagefault time, which directly
affects guest performance, while it could have been done by the host
(this was the reason for zapping unsync children).

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-29 Thread Marcelo Tosatti
On Tue, May 28, 2013 at 10:51:38PM +0800, Xiao Guangrong wrote:
 On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync children
  out. Also, we delete the page from shadow page cache so that the page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
  *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
 ASSERT(is_empty_shadow_page(sp-spt));
  -  hlist_del(sp-hash_link);
  +  hlist_del_init(sp-hash_link);
 list_del(sp-link);
 free_page((unsigned long)sp-spt);
 if (!sp-role.direct)
  @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 return zapped;
   }
   
  -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
  *sp,
  -  struct list_head *invalid_list)
  +static int
  +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
  + bool zap_unsync_children,
  + struct list_head *invalid_list)
   {
  -  int ret;
  +  int ret = 0;
   
 trace_kvm_mmu_prepare_zap_page(sp);
 ++kvm-stat.mmu_shadow_zapped;
  -  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
  +
  +  if (likely(zap_unsync_children))
  +  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
  +
  
  Why is this an important case to be optimized?
  
  1) shadow is the uncommon, obsolete case.
  2) mmu_zap_unsync_children has
  
  if (parent-role.level == PT_PAGE_TABLE_LEVEL)
  return 0;
  
  So the large majority of pages are already optimized.
 
 Hmm, if we zap the high level page (e.g level = 4), it should walk its
 children and its children's children. It is high overload.
 (IMHO, trivial optimization is still necessary, especially, the change
 is really slight.)
 
 And, there is another point me mentioned in the changelog:
 Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.
 Skipping zapping unsync-children can ensure that only one page is
 zapped so that we can use hlist_del_init(sp-hash_link) to completely
 remove the page from mmu-cache.
 
 Now, Gleb and i got a agreement that skipping obsolete page when
 walking hash list is a better way.
 
 BTW, zapping unsync-children is unnecessary, is it?

It is necessary that if an unsync page exists, that 
invlpg emulation is able to reach it, or that at kvm_mmu_get_page 
time they are synchronized.

You transfer the synchronization work to pagefault time, which directly
affects guest performance, while it could have been done by the host
(this was the reason for zapping unsync children).

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-29 Thread Xiao Guangrong
On 05/29/2013 08:25 PM, Marcelo Tosatti wrote:
 On Tue, May 28, 2013 at 10:51:38PM +0800, Xiao Guangrong wrote:
 On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
list_del(sp-link);
free_page((unsigned long)sp-spt);
if (!sp-role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
 *sp,
 -  struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 + bool zap_unsync_children,
 + struct list_head *invalid_list)
  {
 -  int ret;
 +  int ret = 0;
  
trace_kvm_mmu_prepare_zap_page(sp);
++kvm-stat.mmu_shadow_zapped;
 -  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 +  if (likely(zap_unsync_children))
 +  ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +

 Why is this an important case to be optimized?

 1) shadow is the uncommon, obsolete case.
 2) mmu_zap_unsync_children has

 if (parent-role.level == PT_PAGE_TABLE_LEVEL)
 return 0;

 So the large majority of pages are already optimized.

 Hmm, if we zap the high level page (e.g level = 4), it should walk its
 children and its children's children. It is high overload.
 (IMHO, trivial optimization is still necessary, especially, the change
 is really slight.)

 And, there is another point me mentioned in the changelog:
 Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.
 Skipping zapping unsync-children can ensure that only one page is
 zapped so that we can use hlist_del_init(sp-hash_link) to completely
 remove the page from mmu-cache.

 Now, Gleb and i got a agreement that skipping obsolete page when
 walking hash list is a better way.

 BTW, zapping unsync-children is unnecessary, is it?
 
 It is necessary that if an unsync page exists, that 
 invlpg emulation is able to reach it, or that at kvm_mmu_get_page 
 time they are synchronized.

Hmmm? It is not always better.

If unsync pages is zapped, mmu will map a new alloced-page which all
of its entries are nonpresent. It can cause more #PF than the case
we sync the page.
Especially, for the invlpg case, in that case you zap the page which
still have been being mapped on other vcpu's page table which currently
being used.

And, It does the further-possible work at once - spend time to walk/zap all
the unsync children but they may not be used at all. So delaying this work
until they are used is better.

 
 You transfer the synchronization work to pagefault time, which directly
 affects guest performance, while it could have been done by the host
 (this was the reason for zapping unsync children).

It seem no, most case doing zap_page is in the vcpu context, not host.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-28 Thread Xiao Guangrong
On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>> It is only used to zap the obsolete page. Since the obsolete page
>> will not be used, we need not spend time to find its unsync children
>> out. Also, we delete the page from shadow page cache so that the page
>> is completely isolated after call this function.
>>
>> The later patch will use it to collapse tlb flushes
>>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/kvm/mmu.c |   46 +-
>>  1 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 9b57faa..e676356 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
>> *kvm, int nr)
>>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>>  {
>>  ASSERT(is_empty_shadow_page(sp->spt));
>> -hlist_del(>hash_link);
>> +hlist_del_init(>hash_link);
>>  list_del(>link);
>>  free_page((unsigned long)sp->spt);
>>  if (!sp->role.direct)
>> @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>>  return zapped;
>>  }
>>  
>> -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
>> *sp,
>> -struct list_head *invalid_list)
>> +static int
>> +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>> +   bool zap_unsync_children,
>> +   struct list_head *invalid_list)
>>  {
>> -int ret;
>> +int ret = 0;
>>  
>>  trace_kvm_mmu_prepare_zap_page(sp);
>>  ++kvm->stat.mmu_shadow_zapped;
>> -ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
>> +
>> +if (likely(zap_unsync_children))
>> +ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
>> +
> 
> Why is this an important case to be optimized?
> 
> 1) shadow is the uncommon, obsolete case.
> 2) mmu_zap_unsync_children has
> 
> if (parent->role.level == PT_PAGE_TABLE_LEVEL)
> return 0;
> 
> So the large majority of pages are already optimized.

Hmm, if we zap the high level page (e.g level = 4), it should walk its
children and its children's children. It is high overload.
(IMHO, trivial optimization is still necessary, especially, the change
is really slight.)

And, there is another point me mentioned in the changelog:
"Also, we delete the page from shadow page cache so that the page
is completely isolated after call this function."
Skipping zapping unsync-children can ensure that only one page is
zapped so that we can use "hlist_del_init(>hash_link)" to completely
remove the page from mmu-cache.

Now, Gleb and i got a agreement that skipping obsolete page when
walking hash list is a better way.

BTW, zapping unsync-children is unnecessary, is it?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-28 Thread Marcelo Tosatti
On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> It is only used to zap the obsolete page. Since the obsolete page
> will not be used, we need not spend time to find its unsync children
> out. Also, we delete the page from shadow page cache so that the page
> is completely isolated after call this function.
> 
> The later patch will use it to collapse tlb flushes
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/kvm/mmu.c |   46 +-
>  1 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9b57faa..e676356 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
> *kvm, int nr)
>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  {
>   ASSERT(is_empty_shadow_page(sp->spt));
> - hlist_del(>hash_link);
> + hlist_del_init(>hash_link);
>   list_del(>link);
>   free_page((unsigned long)sp->spt);
>   if (!sp->role.direct)
> @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>   return zapped;
>  }
>  
> -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> - struct list_head *invalid_list)
> +static int
> +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> +bool zap_unsync_children,
> +struct list_head *invalid_list)
>  {
> - int ret;
> + int ret = 0;
>  
>   trace_kvm_mmu_prepare_zap_page(sp);
>   ++kvm->stat.mmu_shadow_zapped;
> - ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> +
> + if (likely(zap_unsync_children))
> + ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> +

Why is this an important case to be optimized?

1) shadow is the uncommon, obsolete case.
2) mmu_zap_unsync_children has

if (parent->role.level == PT_PAGE_TABLE_LEVEL)
return 0;

So the large majority of pages are already optimized.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-28 Thread Marcelo Tosatti
On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.
 
 The later patch will use it to collapse tlb flushes
 
 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)
 
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
   ASSERT(is_empty_shadow_page(sp-spt));
 - hlist_del(sp-hash_link);
 + hlist_del_init(sp-hash_link);
   list_del(sp-link);
   free_page((unsigned long)sp-spt);
   if (!sp-role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
   return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 - struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +bool zap_unsync_children,
 +struct list_head *invalid_list)
  {
 - int ret;
 + int ret = 0;
  
   trace_kvm_mmu_prepare_zap_page(sp);
   ++kvm-stat.mmu_shadow_zapped;
 - ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 + if (likely(zap_unsync_children))
 + ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +

Why is this an important case to be optimized?

1) shadow is the uncommon, obsolete case.
2) mmu_zap_unsync_children has

if (parent-role.level == PT_PAGE_TABLE_LEVEL)
return 0;

So the large majority of pages are already optimized.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-28 Thread Xiao Guangrong
On 05/28/2013 08:13 AM, Marcelo Tosatti wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +hlist_del_init(sp-hash_link);
  list_del(sp-link);
  free_page((unsigned long)sp-spt);
  if (!sp-role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
  return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
 *sp,
 -struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +   bool zap_unsync_children,
 +   struct list_head *invalid_list)
  {
 -int ret;
 +int ret = 0;
  
  trace_kvm_mmu_prepare_zap_page(sp);
  ++kvm-stat.mmu_shadow_zapped;
 -ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 +if (likely(zap_unsync_children))
 +ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 
 Why is this an important case to be optimized?
 
 1) shadow is the uncommon, obsolete case.
 2) mmu_zap_unsync_children has
 
 if (parent-role.level == PT_PAGE_TABLE_LEVEL)
 return 0;
 
 So the large majority of pages are already optimized.

Hmm, if we zap the high level page (e.g level = 4), it should walk its
children and its children's children. It is high overload.
(IMHO, trivial optimization is still necessary, especially, the change
is really slight.)

And, there is another point me mentioned in the changelog:
Also, we delete the page from shadow page cache so that the page
is completely isolated after call this function.
Skipping zapping unsync-children can ensure that only one page is
zapped so that we can use hlist_del_init(sp-hash_link) to completely
remove the page from mmu-cache.

Now, Gleb and i got a agreement that skipping obsolete page when
walking hash list is a better way.

BTW, zapping unsync-children is unnecessary, is it?


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/24/2013 01:39 PM, Xiao Guangrong wrote:

>>> if (kvm_mmu_prepare_zap_page(sp, list))
>>> hlist_del(sp->hlist);
>>>
>>> Or, i missed your suggestion?
>> My assumption is that we can leave obsolete shadow pages on hashtable
>> till commit_zap time.
> 
> Ah, i see.
> 
> Yes, i agree with your idea. I think we can only skip the obsolete-and-invalid
> page since the obsolete-but-unzapped page still affects the mmu's behaviour,
> for example, it can cause page write-protect, kvm_mmu_unprotect_page()
> can not work by skipping unzapped-obsolete pages.

kvm_mmu_unprotect_page() can work, we can skip obsolete pages too when detect
whether need to write-protect a page, it is easier to make the page become
writable when zapping obsolete pages.

Will update it following your idea, sorry for my noise.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 11:57 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 09:03:50PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 08:39 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 04:09 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 03:37 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync 
 children
 out. Also, we delete the page from shadow page cache so that the 
 page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void 
 kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp->spt));
 -  hlist_del(>hash_link);
 +  hlist_del_init(>hash_link);
>>> Why do you need hlist_del_init() here? Why not move it into
>>
>> Since the hlist will be double freed. We will it like this:
>>
>> kvm_mmu_prepare_zap_obsolete_page(page, list);
>> kvm_mmu_commit_zap_page(list);
>>kvm_mmu_free_page(page);
>>
>> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which 
>> have
>> deleted the hash list.
>>
>>> kvm_mmu_prepare_zap_page() like we discussed it here:
>>> https://patchwork.kernel.org/patch/2580351/ instead of doing
>>> it differently for obsolete and non obsolete pages?
>>
>> It is can break the hash-list walking: we should rescan the
>> hash list once the page is prepared-ly zapped.
>>
>> I mentioned it in the changelog:
>>
>>   4): drop the patch which deleted page from hash list at the 
>> "prepare"
>>   time since it can break the walk based on hash list.
> Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(>mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, _list);
}
kvm_mmu_commit_zap_page(kvm, _list);
spin_unlock(>mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
 list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
 should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, _list);

>>> Hmm, yes. So lets leave it as is and always commit invalid_list before
>>
>> So, you mean drop this patch and the patch of
>> KVM: MMU: collapse TLB flushes when zap all pages?
>>
> We still want to add kvm_reload_remote_mmus() to
> kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
> optimization here. So may be skipping obsolete pages while walking
> hashtable is better solution.

 I am willing to use this way instead, but it looks worse than this
 patch:

 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 09:03:50PM +0800, Xiao Guangrong wrote:
> On 05/23/2013 08:39 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 04:09 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
>  On 05/23/2013 03:37 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> >> It is only used to zap the obsolete page. Since the obsolete page
> >> will not be used, we need not spend time to find its unsync 
> >> children
> >> out. Also, we delete the page from shadow page cache so that the 
> >> page
> >> is completely isolated after call this function.
> >>
> >> The later patch will use it to collapse tlb flushes
> >>
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/kvm/mmu.c |   46 
> >> +-
> >>  1 files changed, 41 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> >> index 9b57faa..e676356 100644
> >> --- a/arch/x86/kvm/mmu.c
> >> +++ b/arch/x86/kvm/mmu.c
> >> @@ -1466,7 +1466,7 @@ static inline void 
> >> kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
> >>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
> >>  {
> >>ASSERT(is_empty_shadow_page(sp->spt));
> >> -  hlist_del(>hash_link);
> >> +  hlist_del_init(>hash_link);
> > Why do you need hlist_del_init() here? Why not move it into
> 
>  Since the hlist will be double freed. We will it like this:
> 
>  kvm_mmu_prepare_zap_obsolete_page(page, list);
>  kvm_mmu_commit_zap_page(list);
> kvm_mmu_free_page(page);
> 
>  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which 
>  have
>  deleted the hash list.
> 
> > kvm_mmu_prepare_zap_page() like we discussed it here:
> > https://patchwork.kernel.org/patch/2580351/ instead of doing
> > it differently for obsolete and non obsolete pages?
> 
>  It is can break the hash-list walking: we should rescan the
>  hash list once the page is prepared-ly zapped.
> 
>  I mentioned it in the changelog:
> 
>    4): drop the patch which deleted page from hash list at the 
>  "prepare"
>    time since it can break the walk based on hash list.
> >>> Can you elaborate on how this can happen?
> >>
> >> There is a example:
> >>
> >> int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
> >> {
> >>struct kvm_mmu_page *sp;
> >>LIST_HEAD(invalid_list);
> >>int r;
> >>
> >>pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
> >>r = 0;
> >>spin_lock(>mmu_lock);
> >>for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
> >>pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
> >> sp->role.word);
> >>r = 1;
> >>kvm_mmu_prepare_zap_page(kvm, sp, _list);
> >>}
> >>kvm_mmu_commit_zap_page(kvm, _list);
> >>spin_unlock(>mmu_lock);
> >>
> >>return r;
> >> }
> >>
> >> It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
> >> list.
> >> If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
> >> should
> >> be changed to:
> >>
> >> restart:
> >>for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
> >>pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
> >> sp->role.word);
> >>r = 1;
> >>if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
> >>goto restart;
> >>}
> >>kvm_mmu_commit_zap_page(kvm, _list);
> >>
> > Hmm, yes. So lets leave it as is and always commit invalid_list before
> 
>  So, you mean drop this patch and the patch of
>  KVM: MMU: collapse TLB flushes when zap all pages?
> 
> >>> We still want to add kvm_reload_remote_mmus() to
> >>> kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
> >>> optimization here. So may be skipping obsolete pages while walking
> >>> hashtable is better solution.
> >>
> >> I am willing to use this way instead, but it looks worse than this
> >> patch:
> >>
> >> diff --git a/arch/x86/kvm/mmu.c 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 08:39 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 04:09 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 02:18 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>> It is only used to zap the obsolete page. Since the obsolete page
>> will not be used, we need not spend time to find its unsync children
>> out. Also, we delete the page from shadow page cache so that the page
>> is completely isolated after call this function.
>>
>> The later patch will use it to collapse tlb flushes
>>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/kvm/mmu.c |   46 
>> +-
>>  1 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 9b57faa..e676356 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1466,7 +1466,7 @@ static inline void 
>> kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
>>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>>  {
>>  ASSERT(is_empty_shadow_page(sp->spt));
>> -hlist_del(>hash_link);
>> +hlist_del_init(>hash_link);
> Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

> kvm_mmu_prepare_zap_page() like we discussed it here:
> https://patchwork.kernel.org/patch/2580351/ instead of doing
> it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the "prepare"
   time since it can break the walk based on hash list.
>>> Can you elaborate on how this can happen?
>>
>> There is a example:
>>
>> int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
>> {
>>  struct kvm_mmu_page *sp;
>>  LIST_HEAD(invalid_list);
>>  int r;
>>
>>  pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
>>  r = 0;
>>  spin_lock(>mmu_lock);
>>  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>>  pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>>   sp->role.word);
>>  r = 1;
>>  kvm_mmu_prepare_zap_page(kvm, sp, _list);
>>  }
>>  kvm_mmu_commit_zap_page(kvm, _list);
>>  spin_unlock(>mmu_lock);
>>
>>  return r;
>> }
>>
>> It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
>> list.
>> If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
>> should
>> be changed to:
>>
>> restart:
>>  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>>  pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>>   sp->role.word);
>>  r = 1;
>>  if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
>>  goto restart;
>>  }
>>  kvm_mmu_commit_zap_page(kvm, _list);
>>
> Hmm, yes. So lets leave it as is and always commit invalid_list before

 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?

>>> We still want to add kvm_reload_remote_mmus() to
>>> kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
>>> optimization here. So may be skipping obsolete pages while walking
>>> hashtable is better solution.
>>
>> I am willing to use this way instead, but it looks worse than this
>> patch:
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 9b57faa..810410c 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
>> *kvm, int nr)
>>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>>  {
>>  ASSERT(is_empty_shadow_page(sp->spt));
>> -hlist_del(>hash_link);
>> +hlist_del_init(>hash_link);
> Why not drop this
> 
>>  list_del(>link);
>>  free_page((unsigned long)sp->spt);
>>  

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
> On 05/23/2013 04:09 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 03:37 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
>  On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>  It is only used to zap the obsolete page. Since the obsolete page
>  will not be used, we need not spend time to find its unsync children
>  out. Also, we delete the page from shadow page cache so that the page
>  is completely isolated after call this function.
> 
>  The later patch will use it to collapse tlb flushes
> 
>  Signed-off-by: Xiao Guangrong 
>  ---
>   arch/x86/kvm/mmu.c |   46 
>  +-
>   1 files changed, 41 insertions(+), 5 deletions(-)
> 
>  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>  index 9b57faa..e676356 100644
>  --- a/arch/x86/kvm/mmu.c
>  +++ b/arch/x86/kvm/mmu.c
>  @@ -1466,7 +1466,7 @@ static inline void 
>  kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
>   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>   {
>   ASSERT(is_empty_shadow_page(sp->spt));
>  -hlist_del(>hash_link);
>  +hlist_del_init(>hash_link);
> >>> Why do you need hlist_del_init() here? Why not move it into
> >>
> >> Since the hlist will be double freed. We will it like this:
> >>
> >> kvm_mmu_prepare_zap_obsolete_page(page, list);
> >> kvm_mmu_commit_zap_page(list);
> >>kvm_mmu_free_page(page);
> >>
> >> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
> >> deleted the hash list.
> >>
> >>> kvm_mmu_prepare_zap_page() like we discussed it here:
> >>> https://patchwork.kernel.org/patch/2580351/ instead of doing
> >>> it differently for obsolete and non obsolete pages?
> >>
> >> It is can break the hash-list walking: we should rescan the
> >> hash list once the page is prepared-ly zapped.
> >>
> >> I mentioned it in the changelog:
> >>
> >>   4): drop the patch which deleted page from hash list at the "prepare"
> >>   time since it can break the walk based on hash list.
> > Can you elaborate on how this can happen?
> 
>  There is a example:
> 
>  int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
>  {
>   struct kvm_mmu_page *sp;
>   LIST_HEAD(invalid_list);
>   int r;
> 
>   pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
>   r = 0;
>   spin_lock(>mmu_lock);
>   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>   pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>    sp->role.word);
>   r = 1;
>   kvm_mmu_prepare_zap_page(kvm, sp, _list);
>   }
>   kvm_mmu_commit_zap_page(kvm, _list);
>   spin_unlock(>mmu_lock);
> 
>   return r;
>  }
> 
>  It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
>  list.
>  If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
>  should
>  be changed to:
> 
>  restart:
>   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>   pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>    sp->role.word);
>   r = 1;
>   if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
>   goto restart;
>   }
>   kvm_mmu_commit_zap_page(kvm, _list);
> 
> >>> Hmm, yes. So lets leave it as is and always commit invalid_list before
> >>
> >> So, you mean drop this patch and the patch of
> >> KVM: MMU: collapse TLB flushes when zap all pages?
> >>
> > We still want to add kvm_reload_remote_mmus() to
> > kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
> > optimization here. So may be skipping obsolete pages while walking
> > hashtable is better solution.
> 
> I am willing to use this way instead, but it looks worse than this
> patch:
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9b57faa..810410c 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
> *kvm, int nr)
>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  {
>   ASSERT(is_empty_shadow_page(sp->spt));
> - hlist_del(>hash_link);
> + hlist_del_init(>hash_link);
Why not drop this

>   list_del(>link);
>   free_page((unsigned long)sp->spt);
>   if (!sp->role.direct)
> @@ -1648,14 +1648,20 @@ static int 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 04:09 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 03:37 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
 kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp->spt));
 -  hlist_del(>hash_link);
 +  hlist_del_init(>hash_link);
>>> Why do you need hlist_del_init() here? Why not move it into
>>
>> Since the hlist will be double freed. We will it like this:
>>
>> kvm_mmu_prepare_zap_obsolete_page(page, list);
>> kvm_mmu_commit_zap_page(list);
>>kvm_mmu_free_page(page);
>>
>> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
>> deleted the hash list.
>>
>>> kvm_mmu_prepare_zap_page() like we discussed it here:
>>> https://patchwork.kernel.org/patch/2580351/ instead of doing
>>> it differently for obsolete and non obsolete pages?
>>
>> It is can break the hash-list walking: we should rescan the
>> hash list once the page is prepared-ly zapped.
>>
>> I mentioned it in the changelog:
>>
>>   4): drop the patch which deleted page from hash list at the "prepare"
>>   time since it can break the walk based on hash list.
> Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(>mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, _list);
}
kvm_mmu_commit_zap_page(kvm, _list);
spin_unlock(>mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, _list);

>>> Hmm, yes. So lets leave it as is and always commit invalid_list before
>>
>> So, you mean drop this patch and the patch of
>> KVM: MMU: collapse TLB flushes when zap all pages?
>>
> We still want to add kvm_reload_remote_mmus() to
> kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
> optimization here. So may be skipping obsolete pages while walking
> hashtable is better solution.

I am willing to use this way instead, but it looks worse than this
patch:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b57faa..810410c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
*kvm, int nr)
 static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
 {
ASSERT(is_empty_shadow_page(sp->spt));
-   hlist_del(>hash_link);
+   hlist_del_init(>hash_link);
list_del(>link);
free_page((unsigned long)sp->spt);
if (!sp->role.direct)
@@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
struct kvm_mmu_page *sp,
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);

+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+   return 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 04:09 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 03:37 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
 kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp->spt));
 -  hlist_del(>hash_link);
 +  hlist_del_init(>hash_link);
>>> Why do you need hlist_del_init() here? Why not move it into
>>
>> Since the hlist will be double freed. We will it like this:
>>
>> kvm_mmu_prepare_zap_obsolete_page(page, list);
>> kvm_mmu_commit_zap_page(list);
>>kvm_mmu_free_page(page);
>>
>> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
>> deleted the hash list.
>>
>>> kvm_mmu_prepare_zap_page() like we discussed it here:
>>> https://patchwork.kernel.org/patch/2580351/ instead of doing
>>> it differently for obsolete and non obsolete pages?
>>
>> It is can break the hash-list walking: we should rescan the
>> hash list once the page is prepared-ly zapped.
>>
>> I mentioned it in the changelog:
>>
>>   4): drop the patch which deleted page from hash list at the "prepare"
>>   time since it can break the walk based on hash list.
> Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(>mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, _list);
}
kvm_mmu_commit_zap_page(kvm, _list);
spin_unlock(>mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, _list);

>>> Hmm, yes. So lets leave it as is and always commit invalid_list before
>>
>> So, you mean drop this patch and the patch of
>> KVM: MMU: collapse TLB flushes when zap all pages?
>>
> We still want to add kvm_reload_remote_mmus() to
> kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
> optimization here. So may be skipping obsolete pages while walking
> hashtable is better solution.

Okay.

Will update this patch and the later patch.

> 
>> But, we only introduced less code in this patch, most of them is reusing
>> the code of __kvm_mmu_prepare_zap_page...
>>
>> Furthermore, maybe not related to this patch, i do not think calling
>> mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
>> but i need to test it very carefully. Why not let
>> kvm_mmu_prepare_zap_obsolete_page for the first step? :(
> 
> Yes, I want Marcelo opinion on skipping mmu_zap_unsync_children() first.

Okay. Thank you!


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
> On 05/23/2013 03:37 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> >> It is only used to zap the obsolete page. Since the obsolete page
> >> will not be used, we need not spend time to find its unsync children
> >> out. Also, we delete the page from shadow page cache so that the page
> >> is completely isolated after call this function.
> >>
> >> The later patch will use it to collapse tlb flushes
> >>
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/kvm/mmu.c |   46 
> >> +-
> >>  1 files changed, 41 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> >> index 9b57faa..e676356 100644
> >> --- a/arch/x86/kvm/mmu.c
> >> +++ b/arch/x86/kvm/mmu.c
> >> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
> >> kvm *kvm, int nr)
> >>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
> >>  {
> >>ASSERT(is_empty_shadow_page(sp->spt));
> >> -  hlist_del(>hash_link);
> >> +  hlist_del_init(>hash_link);
> > Why do you need hlist_del_init() here? Why not move it into
> 
>  Since the hlist will be double freed. We will it like this:
> 
>  kvm_mmu_prepare_zap_obsolete_page(page, list);
>  kvm_mmu_commit_zap_page(list);
> kvm_mmu_free_page(page);
> 
>  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
>  deleted the hash list.
> 
> > kvm_mmu_prepare_zap_page() like we discussed it here:
> > https://patchwork.kernel.org/patch/2580351/ instead of doing
> > it differently for obsolete and non obsolete pages?
> 
>  It is can break the hash-list walking: we should rescan the
>  hash list once the page is prepared-ly zapped.
> 
>  I mentioned it in the changelog:
> 
>    4): drop the patch which deleted page from hash list at the "prepare"
>    time since it can break the walk based on hash list.
> >>> Can you elaborate on how this can happen?
> >>
> >> There is a example:
> >>
> >> int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
> >> {
> >>struct kvm_mmu_page *sp;
> >>LIST_HEAD(invalid_list);
> >>int r;
> >>
> >>pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
> >>r = 0;
> >>spin_lock(>mmu_lock);
> >>for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
> >>pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
> >> sp->role.word);
> >>r = 1;
> >>kvm_mmu_prepare_zap_page(kvm, sp, _list);
> >>}
> >>kvm_mmu_commit_zap_page(kvm, _list);
> >>spin_unlock(>mmu_lock);
> >>
> >>return r;
> >> }
> >>
> >> It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
> >> If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
> >> be changed to:
> >>
> >> restart:
> >>for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
> >>pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
> >> sp->role.word);
> >>r = 1;
> >>if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
> >>goto restart;
> >>}
> >>kvm_mmu_commit_zap_page(kvm, _list);
> >>
> > Hmm, yes. So lets leave it as is and always commit invalid_list before
> 
> So, you mean drop this patch and the patch of
> KVM: MMU: collapse TLB flushes when zap all pages?
> 
We still want to add kvm_reload_remote_mmus() to
kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
optimization here. So may be skipping obsolete pages while walking
hashtable is better solution.

> But, we only introduced less code in this patch, most of them is reusing
> the code of __kvm_mmu_prepare_zap_page...
> 
> Furthermore, maybe not related to this patch, i do not think calling
> mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
> but i need to test it very carefully. Why not let
> kvm_mmu_prepare_zap_obsolete_page for the first step? :(

Yes, I want Marcelo opinion on skipping mmu_zap_unsync_children() first.

> > releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
> > walking hash table. Former is clearer I think.
> > 
> > --
> > Gleb.
> > 
> > 
> > 

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 03:37 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 02:18 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>> It is only used to zap the obsolete page. Since the obsolete page
>> will not be used, we need not spend time to find its unsync children
>> out. Also, we delete the page from shadow page cache so that the page
>> is completely isolated after call this function.
>>
>> The later patch will use it to collapse tlb flushes
>>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/kvm/mmu.c |   46 +-
>>  1 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 9b57faa..e676356 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
>> kvm *kvm, int nr)
>>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>>  {
>>  ASSERT(is_empty_shadow_page(sp->spt));
>> -hlist_del(>hash_link);
>> +hlist_del_init(>hash_link);
> Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

> kvm_mmu_prepare_zap_page() like we discussed it here:
> https://patchwork.kernel.org/patch/2580351/ instead of doing
> it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the "prepare"
   time since it can break the walk based on hash list.
>>> Can you elaborate on how this can happen?
>>
>> There is a example:
>>
>> int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
>> {
>>  struct kvm_mmu_page *sp;
>>  LIST_HEAD(invalid_list);
>>  int r;
>>
>>  pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
>>  r = 0;
>>  spin_lock(>mmu_lock);
>>  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>>  pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>>   sp->role.word);
>>  r = 1;
>>  kvm_mmu_prepare_zap_page(kvm, sp, _list);
>>  }
>>  kvm_mmu_commit_zap_page(kvm, _list);
>>  spin_unlock(>mmu_lock);
>>
>>  return r;
>> }
>>
>> It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
>> If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
>> be changed to:
>>
>> restart:
>>  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>>  pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>>   sp->role.word);
>>  r = 1;
>>  if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
>>  goto restart;
>>  }
>>  kvm_mmu_commit_zap_page(kvm, _list);
>>
> Hmm, yes. So lets leave it as is and always commit invalid_list before

So, you mean drop this patch and the patch of
KVM: MMU: collapse TLB flushes when zap all pages?

But, we only introduced less code in this patch, most of them is reusing
the code of __kvm_mmu_prepare_zap_page...

Furthermore, maybe not related to this patch, i do not think calling
mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
but i need to test it very carefully. Why not let
kvm_mmu_prepare_zap_obsolete_page for the first step? :(

> releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
> walking hash table. Former is clearer I think.
> 
> --
>   Gleb.
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
> On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
> >> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> >>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>  It is only used to zap the obsolete page. Since the obsolete page
>  will not be used, we need not spend time to find its unsync children
>  out. Also, we delete the page from shadow page cache so that the page
>  is completely isolated after call this function.
> 
>  The later patch will use it to collapse tlb flushes
> 
>  Signed-off-by: Xiao Guangrong 
>  ---
>   arch/x86/kvm/mmu.c |   46 +-
>   1 files changed, 41 insertions(+), 5 deletions(-)
> 
>  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>  index 9b57faa..e676356 100644
>  --- a/arch/x86/kvm/mmu.c
>  +++ b/arch/x86/kvm/mmu.c
>  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
>  kvm *kvm, int nr)
>   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>   {
>   ASSERT(is_empty_shadow_page(sp->spt));
>  -hlist_del(>hash_link);
>  +hlist_del_init(>hash_link);
> >>> Why do you need hlist_del_init() here? Why not move it into
> >>
> >> Since the hlist will be double freed. We will it like this:
> >>
> >> kvm_mmu_prepare_zap_obsolete_page(page, list);
> >> kvm_mmu_commit_zap_page(list);
> >>kvm_mmu_free_page(page);
> >>
> >> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
> >> deleted the hash list.
> >>
> >>> kvm_mmu_prepare_zap_page() like we discussed it here:
> >>> https://patchwork.kernel.org/patch/2580351/ instead of doing
> >>> it differently for obsolete and non obsolete pages?
> >>
> >> It is can break the hash-list walking: we should rescan the
> >> hash list once the page is prepared-ly zapped.
> >>
> >> I mentioned it in the changelog:
> >>
> >>   4): drop the patch which deleted page from hash list at the "prepare"
> >>   time since it can break the walk based on hash list.
> > Can you elaborate on how this can happen?
> 
> There is a example:
> 
> int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
> {
>   struct kvm_mmu_page *sp;
>   LIST_HEAD(invalid_list);
>   int r;
> 
>   pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
>   r = 0;
>   spin_lock(>mmu_lock);
>   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>   pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>sp->role.word);
>   r = 1;
>   kvm_mmu_prepare_zap_page(kvm, sp, _list);
>   }
>   kvm_mmu_commit_zap_page(kvm, _list);
>   spin_unlock(>mmu_lock);
> 
>   return r;
> }
> 
> It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
> If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
> be changed to:
> 
> restart:
>   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
>   pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
>sp->role.word);
>   r = 1;
>   if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
>   goto restart;
>   }
>   kvm_mmu_commit_zap_page(kvm, _list);
> 
Hmm, yes. So lets leave it as is and always commit invalid_list before
releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
walking hash table. Former is clearer I think.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 02:18 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
>> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
>>> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong 
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp->spt));
 -  hlist_del(>hash_link);
 +  hlist_del_init(>hash_link);
>>> Why do you need hlist_del_init() here? Why not move it into
>>
>> Since the hlist will be double freed. We will it like this:
>>
>> kvm_mmu_prepare_zap_obsolete_page(page, list);
>> kvm_mmu_commit_zap_page(list);
>>kvm_mmu_free_page(page);
>>
>> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
>> deleted the hash list.
>>
>>> kvm_mmu_prepare_zap_page() like we discussed it here:
>>> https://patchwork.kernel.org/patch/2580351/ instead of doing
>>> it differently for obsolete and non obsolete pages?
>>
>> It is can break the hash-list walking: we should rescan the
>> hash list once the page is prepared-ly zapped.
>>
>> I mentioned it in the changelog:
>>
>>   4): drop the patch which deleted page from hash list at the "prepare"
>>   time since it can break the walk based on hash list.
> Can you elaborate on how this can happen?

There is a example:

int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(>mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, _list);
}
kvm_mmu_commit_zap_page(kvm, _list);
spin_unlock(>mmu_lock);

return r;
}

It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
be changed to:

restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 sp->role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, _list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, _list);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
> On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> > On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> >> It is only used to zap the obsolete page. Since the obsolete page
> >> will not be used, we need not spend time to find its unsync children
> >> out. Also, we delete the page from shadow page cache so that the page
> >> is completely isolated after call this function.
> >>
> >> The later patch will use it to collapse tlb flushes
> >>
> >> Signed-off-by: Xiao Guangrong 
> >> ---
> >>  arch/x86/kvm/mmu.c |   46 +-
> >>  1 files changed, 41 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> >> index 9b57faa..e676356 100644
> >> --- a/arch/x86/kvm/mmu.c
> >> +++ b/arch/x86/kvm/mmu.c
> >> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
> >> *kvm, int nr)
> >>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
> >>  {
> >>ASSERT(is_empty_shadow_page(sp->spt));
> >> -  hlist_del(>hash_link);
> >> +  hlist_del_init(>hash_link);
> > Why do you need hlist_del_init() here? Why not move it into
> 
> Since the hlist will be double freed. We will it like this:
> 
> kvm_mmu_prepare_zap_obsolete_page(page, list);
> kvm_mmu_commit_zap_page(list);
>kvm_mmu_free_page(page);
> 
> The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
> deleted the hash list.
> 
> > kvm_mmu_prepare_zap_page() like we discussed it here:
> > https://patchwork.kernel.org/patch/2580351/ instead of doing
> > it differently for obsolete and non obsolete pages?
> 
> It is can break the hash-list walking: we should rescan the
> hash list once the page is prepared-ly zapped.
> 
> I mentioned it in the changelog:
> 
>   4): drop the patch which deleted page from hash list at the "prepare"
>   time since it can break the walk based on hash list.
Can you elaborate on how this can happen?

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 01:57 PM, Gleb Natapov wrote:
> On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
>> It is only used to zap the obsolete page. Since the obsolete page
>> will not be used, we need not spend time to find its unsync children
>> out. Also, we delete the page from shadow page cache so that the page
>> is completely isolated after call this function.
>>
>> The later patch will use it to collapse tlb flushes
>>
>> Signed-off-by: Xiao Guangrong 
>> ---
>>  arch/x86/kvm/mmu.c |   46 +-
>>  1 files changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 9b57faa..e676356 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
>> *kvm, int nr)
>>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>>  {
>>  ASSERT(is_empty_shadow_page(sp->spt));
>> -hlist_del(>hash_link);
>> +hlist_del_init(>hash_link);
> Why do you need hlist_del_init() here? Why not move it into

Since the hlist will be double freed. We will it like this:

kvm_mmu_prepare_zap_obsolete_page(page, list);
kvm_mmu_commit_zap_page(list);
   kvm_mmu_free_page(page);

The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
deleted the hash list.

> kvm_mmu_prepare_zap_page() like we discussed it here:
> https://patchwork.kernel.org/patch/2580351/ instead of doing
> it differently for obsolete and non obsolete pages?

It is can break the hash-list walking: we should rescan the
hash list once the page is prepared-ly zapped.

I mentioned it in the changelog:

  4): drop the patch which deleted page from hash list at the "prepare"
  time since it can break the walk based on hash list.
> 
>>  list_del(>link);
>>  free_page((unsigned long)sp->spt);
>>  if (!sp->role.direct)
>> @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>>  return zapped;
>>  }
>>  
>> -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
>> *sp,
>> -struct list_head *invalid_list)
>> +static int
>> +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>> +   bool zap_unsync_children,
>> +   struct list_head *invalid_list)
>>  {
>> -int ret;
>> +int ret = 0;
>>  
>>  trace_kvm_mmu_prepare_zap_page(sp);
>>  ++kvm->stat.mmu_shadow_zapped;
>> -ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
>> +
>> +if (likely(zap_unsync_children))
>> +ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
>> +
>>  kvm_mmu_page_unlink_children(kvm, sp);
>>  kvm_mmu_unlink_parents(kvm, sp);
>>  
>> @@ -2099,6 +2104,37 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
>> struct kvm_mmu_page *sp,
>>  return ret;
>>  }
>>  
>> +/*
>> + * The obsolete page will not be used, we need not spend time to find
>> + * its unsync children out. Also, we delete the page from shadow page
>> + * cache so that the page is completely isolated after call this
>> + * function.
>> + *
>> + * Note: if we use this function in for_each_gfn_xxx macros, we should
>> + * re-walk the list when it successfully zaps one page.
>> + */
>> +static int
>> +kvm_mmu_prepare_zap_obsolete_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>> +  struct list_head *invalid_list)
>> +{
>> +int ret;
>> +
>> +WARN_ON(!is_obsolete_sp(kvm, sp));
>> +
>> +ret = __kvm_mmu_prepare_zap_page(kvm, sp, false, invalid_list);
>> +if (ret)
>> +hlist_del_init(>hash_link);
> Why hlist_del() is not enough?

Since it will be deleted again in kvm_mmu_free_page().
I am not sure if has another better way to do this.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

Since the hlist will be double freed. We will it like this:

kvm_mmu_prepare_zap_obsolete_page(page, list);
kvm_mmu_commit_zap_page(list);
   kvm_mmu_free_page(page);

The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

It is can break the hash-list walking: we should rescan the
hash list once the page is prepared-ly zapped.

I mentioned it in the changelog:

  4): drop the patch which deleted page from hash list at the prepare
  time since it can break the walk based on hash list.
 
  list_del(sp-link);
  free_page((unsigned long)sp-spt);
  if (!sp-role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
  return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page 
 *sp,
 -struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +   bool zap_unsync_children,
 +   struct list_head *invalid_list)
  {
 -int ret;
 +int ret = 0;
  
  trace_kvm_mmu_prepare_zap_page(sp);
  ++kvm-stat.mmu_shadow_zapped;
 -ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 +if (likely(zap_unsync_children))
 +ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
  kvm_mmu_page_unlink_children(kvm, sp);
  kvm_mmu_unlink_parents(kvm, sp);
  
 @@ -2099,6 +2104,37 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
  return ret;
  }
  
 +/*
 + * The obsolete page will not be used, we need not spend time to find
 + * its unsync children out. Also, we delete the page from shadow page
 + * cache so that the page is completely isolated after call this
 + * function.
 + *
 + * Note: if we use this function in for_each_gfn_xxx macros, we should
 + * re-walk the list when it successfully zaps one page.
 + */
 +static int
 +kvm_mmu_prepare_zap_obsolete_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +  struct list_head *invalid_list)
 +{
 +int ret;
 +
 +WARN_ON(!is_obsolete_sp(kvm, sp));
 +
 +ret = __kvm_mmu_prepare_zap_page(kvm, sp, false, invalid_list);
 +if (ret)
 +hlist_del_init(sp-hash_link);
 Why hlist_del() is not enough?

Since it will be deleted again in kvm_mmu_free_page().
I am not sure if has another better way to do this.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync children
  out. Also, we delete the page from shadow page cache so that the page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
  *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
 ASSERT(is_empty_shadow_page(sp-spt));
  -  hlist_del(sp-hash_link);
  +  hlist_del_init(sp-hash_link);
  Why do you need hlist_del_init() here? Why not move it into
 
 Since the hlist will be double freed. We will it like this:
 
 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);
 
 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.
 
  kvm_mmu_prepare_zap_page() like we discussed it here:
  https://patchwork.kernel.org/patch/2580351/ instead of doing
  it differently for obsolete and non obsolete pages?
 
 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.
 
 I mentioned it in the changelog:
 
   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
Can you elaborate on how this can happen?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

There is a example:

int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
r = 0;
spin_lock(kvm-mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
kvm_mmu_commit_zap_page(kvm, invalid_list);
spin_unlock(kvm-mmu_lock);

return r;
}

It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
be changed to:

restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, invalid_list);


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync children
  out. Also, we delete the page from shadow page cache so that the page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
  kvm *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
   ASSERT(is_empty_shadow_page(sp-spt));
  -hlist_del(sp-hash_link);
  +hlist_del_init(sp-hash_link);
  Why do you need hlist_del_init() here? Why not move it into
 
  Since the hlist will be double freed. We will it like this:
 
  kvm_mmu_prepare_zap_obsolete_page(page, list);
  kvm_mmu_commit_zap_page(list);
 kvm_mmu_free_page(page);
 
  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
  deleted the hash list.
 
  kvm_mmu_prepare_zap_page() like we discussed it here:
  https://patchwork.kernel.org/patch/2580351/ instead of doing
  it differently for obsolete and non obsolete pages?
 
  It is can break the hash-list walking: we should rescan the
  hash list once the page is prepared-ly zapped.
 
  I mentioned it in the changelog:
 
4): drop the patch which deleted page from hash list at the prepare
time since it can break the walk based on hash list.
  Can you elaborate on how this can happen?
 
 There is a example:
 
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
   struct kvm_mmu_page *sp;
   LIST_HEAD(invalid_list);
   int r;
 
   pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
   r = 0;
   spin_lock(kvm-mmu_lock);
   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
   pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
sp-role.word);
   r = 1;
   kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
   }
   kvm_mmu_commit_zap_page(kvm, invalid_list);
   spin_unlock(kvm-mmu_lock);
 
   return r;
 }
 
 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:
 
 restart:
   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
   pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
sp-role.word);
   r = 1;
   if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
   goto restart;
   }
   kvm_mmu_commit_zap_page(kvm, invalid_list);
 
Hmm, yes. So lets leave it as is and always commit invalid_list before
releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
walking hash table. Former is clearer I think.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 03:37 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
 kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
  struct kvm_mmu_page *sp;
  LIST_HEAD(invalid_list);
  int r;

  pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
  r = 0;
  spin_lock(kvm-mmu_lock);
  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
  pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
   sp-role.word);
  r = 1;
  kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
  }
  kvm_mmu_commit_zap_page(kvm, invalid_list);
  spin_unlock(kvm-mmu_lock);

  return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:

 restart:
  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
  pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
   sp-role.word);
  r = 1;
  if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
  goto restart;
  }
  kvm_mmu_commit_zap_page(kvm, invalid_list);

 Hmm, yes. So lets leave it as is and always commit invalid_list before

So, you mean drop this patch and the patch of
KVM: MMU: collapse TLB flushes when zap all pages?

But, we only introduced less code in this patch, most of them is reusing
the code of __kvm_mmu_prepare_zap_page...

Furthermore, maybe not related to this patch, i do not think calling
mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
but i need to test it very carefully. Why not let
kvm_mmu_prepare_zap_obsolete_page for the first step? :(

 releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
 walking hash table. Former is clearer I think.
 
 --
   Gleb.
 
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 02:18 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync children
  out. Also, we delete the page from shadow page cache so that the page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 
  +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
  kvm *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
 ASSERT(is_empty_shadow_page(sp-spt));
  -  hlist_del(sp-hash_link);
  +  hlist_del_init(sp-hash_link);
  Why do you need hlist_del_init() here? Why not move it into
 
  Since the hlist will be double freed. We will it like this:
 
  kvm_mmu_prepare_zap_obsolete_page(page, list);
  kvm_mmu_commit_zap_page(list);
 kvm_mmu_free_page(page);
 
  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
  deleted the hash list.
 
  kvm_mmu_prepare_zap_page() like we discussed it here:
  https://patchwork.kernel.org/patch/2580351/ instead of doing
  it differently for obsolete and non obsolete pages?
 
  It is can break the hash-list walking: we should rescan the
  hash list once the page is prepared-ly zapped.
 
  I mentioned it in the changelog:
 
4): drop the patch which deleted page from hash list at the prepare
time since it can break the walk based on hash list.
  Can you elaborate on how this can happen?
 
  There is a example:
 
  int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
  {
 struct kvm_mmu_page *sp;
 LIST_HEAD(invalid_list);
 int r;
 
 pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
 r = 0;
 spin_lock(kvm-mmu_lock);
 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
 pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
  sp-role.word);
 r = 1;
 kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
 }
 kvm_mmu_commit_zap_page(kvm, invalid_list);
 spin_unlock(kvm-mmu_lock);
 
 return r;
  }
 
  It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
  If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
  be changed to:
 
  restart:
 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
 pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
  sp-role.word);
 r = 1;
 if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
 goto restart;
 }
 kvm_mmu_commit_zap_page(kvm, invalid_list);
 
  Hmm, yes. So lets leave it as is and always commit invalid_list before
 
 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?
 
We still want to add kvm_reload_remote_mmus() to
kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
optimization here. So may be skipping obsolete pages while walking
hashtable is better solution.

 But, we only introduced less code in this patch, most of them is reusing
 the code of __kvm_mmu_prepare_zap_page...
 
 Furthermore, maybe not related to this patch, i do not think calling
 mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
 but i need to test it very carefully. Why not let
 kvm_mmu_prepare_zap_obsolete_page for the first step? :(

Yes, I want Marcelo opinion on skipping mmu_zap_unsync_children() first.

  releasing lock in kvm_zap_obsolete_pages() or skip obsolete pages while
  walking hash table. Former is clearer I think.
  
  --
  Gleb.
  
  
  

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 04:09 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
 kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
r = 0;
spin_lock(kvm-mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
kvm_mmu_commit_zap_page(kvm, invalid_list);
spin_unlock(kvm-mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, invalid_list);

 Hmm, yes. So lets leave it as is and always commit invalid_list before

 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?

 We still want to add kvm_reload_remote_mmus() to
 kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
 optimization here. So may be skipping obsolete pages while walking
 hashtable is better solution.

Okay.

Will update this patch and the later patch.

 
 But, we only introduced less code in this patch, most of them is reusing
 the code of __kvm_mmu_prepare_zap_page...

 Furthermore, maybe not related to this patch, i do not think calling
 mmu_zap_unsync_children() in kvm_mmu_prepare_zap_page() is necessary,
 but i need to test it very carefully. Why not let
 kvm_mmu_prepare_zap_obsolete_page for the first step? :(
 
 Yes, I want Marcelo opinion on skipping mmu_zap_unsync_children() first.

Okay. Thank you!


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 04:09 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct 
 kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
r = 0;
spin_lock(kvm-mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
kvm_mmu_commit_zap_page(kvm, invalid_list);
spin_unlock(kvm-mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, invalid_list);

 Hmm, yes. So lets leave it as is and always commit invalid_list before

 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?

 We still want to add kvm_reload_remote_mmus() to
 kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
 optimization here. So may be skipping obsolete pages while walking
 hashtable is better solution.

I am willing to use this way instead, but it looks worse than this
patch:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b57faa..810410c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
*kvm, int nr)
 static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
 {
ASSERT(is_empty_shadow_page(sp-spt));
-   hlist_del(sp-hash_link);
+   hlist_del_init(sp-hash_link);
list_del(sp-link);
free_page((unsigned long)sp-spt);
if (!sp-role.direct)
@@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
struct kvm_mmu_page *sp,
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);

+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+   return unlikely(sp-mmu_valid_gen != kvm-arch.mmu_valid_gen);
+}
+
 #define for_each_gfn_sp(_kvm, _sp, _gfn)   \
hlist_for_each_entry(_sp,   \
  (_kvm)-arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-   if ((_sp)-gfn != (_gfn)) {} else
+   if ((_sp)-gfn != (_gfn) || is_obsolete_sp(_kvm, _sp)) {} else

 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 04:09 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 03:37 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 02:18 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync children
  out. Also, we delete the page from shadow page cache so that the page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 
  +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void 
  kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
   ASSERT(is_empty_shadow_page(sp-spt));
  -hlist_del(sp-hash_link);
  +hlist_del_init(sp-hash_link);
  Why do you need hlist_del_init() here? Why not move it into
 
  Since the hlist will be double freed. We will it like this:
 
  kvm_mmu_prepare_zap_obsolete_page(page, list);
  kvm_mmu_commit_zap_page(list);
 kvm_mmu_free_page(page);
 
  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
  deleted the hash list.
 
  kvm_mmu_prepare_zap_page() like we discussed it here:
  https://patchwork.kernel.org/patch/2580351/ instead of doing
  it differently for obsolete and non obsolete pages?
 
  It is can break the hash-list walking: we should rescan the
  hash list once the page is prepared-ly zapped.
 
  I mentioned it in the changelog:
 
4): drop the patch which deleted page from hash list at the prepare
time since it can break the walk based on hash list.
  Can you elaborate on how this can happen?
 
  There is a example:
 
  int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
  {
   struct kvm_mmu_page *sp;
   LIST_HEAD(invalid_list);
   int r;
 
   pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
   r = 0;
   spin_lock(kvm-mmu_lock);
   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
   pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
sp-role.word);
   r = 1;
   kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
   }
   kvm_mmu_commit_zap_page(kvm, invalid_list);
   spin_unlock(kvm-mmu_lock);
 
   return r;
  }
 
  It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
  list.
  If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
  should
  be changed to:
 
  restart:
   for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
   pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
sp-role.word);
   r = 1;
   if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
   goto restart;
   }
   kvm_mmu_commit_zap_page(kvm, invalid_list);
 
  Hmm, yes. So lets leave it as is and always commit invalid_list before
 
  So, you mean drop this patch and the patch of
  KVM: MMU: collapse TLB flushes when zap all pages?
 
  We still want to add kvm_reload_remote_mmus() to
  kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
  optimization here. So may be skipping obsolete pages while walking
  hashtable is better solution.
 
 I am willing to use this way instead, but it looks worse than this
 patch:
 
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..810410c 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
   ASSERT(is_empty_shadow_page(sp-spt));
 - hlist_del(sp-hash_link);
 + hlist_del_init(sp-hash_link);
Why not drop this

   list_del(sp-link);
   free_page((unsigned long)sp-spt);
   if (!sp-role.direct)
 @@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
   struct list_head *invalid_list);
 
 +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 +{
 + return unlikely(sp-mmu_valid_gen != kvm-arch.mmu_valid_gen);
 +}
 +
  #define for_each_gfn_sp(_kvm, _sp, _gfn) \
   hlist_for_each_entry(_sp,   \
 (_kvm)-arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
 - if ((_sp)-gfn != 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 08:39 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 04:09 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void 
 kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
  struct kvm_mmu_page *sp;
  LIST_HEAD(invalid_list);
  int r;

  pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
  r = 0;
  spin_lock(kvm-mmu_lock);
  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
  pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
   sp-role.word);
  r = 1;
  kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
  }
  kvm_mmu_commit_zap_page(kvm, invalid_list);
  spin_unlock(kvm-mmu_lock);

  return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
 list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
 should
 be changed to:

 restart:
  for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
  pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
   sp-role.word);
  r = 1;
  if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
  goto restart;
  }
  kvm_mmu_commit_zap_page(kvm, invalid_list);

 Hmm, yes. So lets leave it as is and always commit invalid_list before

 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?

 We still want to add kvm_reload_remote_mmus() to
 kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
 optimization here. So may be skipping obsolete pages while walking
 hashtable is better solution.

 I am willing to use this way instead, but it looks worse than this
 patch:

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..810410c 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
  ASSERT(is_empty_shadow_page(sp-spt));
 -hlist_del(sp-hash_link);
 +hlist_del_init(sp-hash_link);
 Why not drop this
 
  list_del(sp-link);
  free_page((unsigned long)sp-spt);
  if (!sp-role.direct)
 @@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
  struct list_head *invalid_list);

 +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 +{
 +return unlikely(sp-mmu_valid_gen != kvm-arch.mmu_valid_gen);
 +}
 +
  #define for_each_gfn_sp(_kvm, _sp, _gfn)\
  hlist_for_each_entry(_sp,   \
(_kvm)-arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
 -if ((_sp)-gfn != (_gfn)) {} else
 +if ((_sp)-gfn != (_gfn) || 

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Gleb Natapov
On Thu, May 23, 2013 at 09:03:50PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 08:39 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 04:09 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 03:37 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 02:18 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
  On 05/23/2013 01:57 PM, Gleb Natapov wrote:
  On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
  It is only used to zap the obsolete page. Since the obsolete page
  will not be used, we need not spend time to find its unsync 
  children
  out. Also, we delete the page from shadow page cache so that the 
  page
  is completely isolated after call this function.
 
  The later patch will use it to collapse tlb flushes
 
  Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
  ---
   arch/x86/kvm/mmu.c |   46 
  +-
   1 files changed, 41 insertions(+), 5 deletions(-)
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..e676356 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void 
  kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
 ASSERT(is_empty_shadow_page(sp-spt));
  -  hlist_del(sp-hash_link);
  +  hlist_del_init(sp-hash_link);
  Why do you need hlist_del_init() here? Why not move it into
 
  Since the hlist will be double freed. We will it like this:
 
  kvm_mmu_prepare_zap_obsolete_page(page, list);
  kvm_mmu_commit_zap_page(list);
 kvm_mmu_free_page(page);
 
  The first place is kvm_mmu_prepare_zap_obsolete_page(page), which 
  have
  deleted the hash list.
 
  kvm_mmu_prepare_zap_page() like we discussed it here:
  https://patchwork.kernel.org/patch/2580351/ instead of doing
  it differently for obsolete and non obsolete pages?
 
  It is can break the hash-list walking: we should rescan the
  hash list once the page is prepared-ly zapped.
 
  I mentioned it in the changelog:
 
4): drop the patch which deleted page from hash list at the 
  prepare
time since it can break the walk based on hash list.
  Can you elaborate on how this can happen?
 
  There is a example:
 
  int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
  {
 struct kvm_mmu_page *sp;
 LIST_HEAD(invalid_list);
 int r;
 
 pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
 r = 0;
 spin_lock(kvm-mmu_lock);
 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
 pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
  sp-role.word);
 r = 1;
 kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
 }
 kvm_mmu_commit_zap_page(kvm, invalid_list);
 spin_unlock(kvm-mmu_lock);
 
 return r;
  }
 
  It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
  list.
  If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
  should
  be changed to:
 
  restart:
 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
 pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
  sp-role.word);
 r = 1;
 if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
 goto restart;
 }
 kvm_mmu_commit_zap_page(kvm, invalid_list);
 
  Hmm, yes. So lets leave it as is and always commit invalid_list before
 
  So, you mean drop this patch and the patch of
  KVM: MMU: collapse TLB flushes when zap all pages?
 
  We still want to add kvm_reload_remote_mmus() to
  kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
  optimization here. So may be skipping obsolete pages while walking
  hashtable is better solution.
 
  I am willing to use this way instead, but it looks worse than this
  patch:
 
  diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
  index 9b57faa..810410c 100644
  --- a/arch/x86/kvm/mmu.c
  +++ b/arch/x86/kvm/mmu.c
  @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
  *kvm, int nr)
   static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
   {
 ASSERT(is_empty_shadow_page(sp-spt));
  -  hlist_del(sp-hash_link);
  +  hlist_del_init(sp-hash_link);
  Why not drop this
  
 list_del(sp-link);
 free_page((unsigned long)sp-spt);
 if (!sp-role.direct)
  @@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm 
  *kvm, struct kvm_mmu_page *sp,
   static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 struct list_head *invalid_list);
 
  +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  +{
  +  return unlikely(sp-mmu_valid_gen != kvm-arch.mmu_valid_gen);
  

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/23/2013 11:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 09:03:50PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 08:39 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 07:13:58PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 04:09 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:50:16PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 03:37 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:31:47PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 02:18 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 02:13:06PM +0800, Xiao Guangrong wrote:
 On 05/23/2013 01:57 PM, Gleb Natapov wrote:
 On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync 
 children
 out. Also, we delete the page from shadow page cache so that the 
 page
 is completely isolated after call this function.

 The later patch will use it to collapse tlb flushes

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 
 +-
  1 files changed, 41 insertions(+), 5 deletions(-)

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void 
 kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
 Why do you need hlist_del_init() here? Why not move it into

 Since the hlist will be double freed. We will it like this:

 kvm_mmu_prepare_zap_obsolete_page(page, list);
 kvm_mmu_commit_zap_page(list);
kvm_mmu_free_page(page);

 The first place is kvm_mmu_prepare_zap_obsolete_page(page), which 
 have
 deleted the hash list.

 kvm_mmu_prepare_zap_page() like we discussed it here:
 https://patchwork.kernel.org/patch/2580351/ instead of doing
 it differently for obsolete and non obsolete pages?

 It is can break the hash-list walking: we should rescan the
 hash list once the page is prepared-ly zapped.

 I mentioned it in the changelog:

   4): drop the patch which deleted page from hash list at the 
 prepare
   time since it can break the walk based on hash list.
 Can you elaborate on how this can happen?

 There is a example:

 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
int r;

pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
r = 0;
spin_lock(kvm-mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
kvm_mmu_commit_zap_page(kvm, invalid_list);
spin_unlock(kvm-mmu_lock);

return r;
 }

 It works fine since kvm_mmu_prepare_zap_page does not touch the hash 
 list.
 If we delete hlist in kvm_mmu_prepare_zap_page(), this kind of codes 
 should
 be changed to:

 restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
r = 1;
if (kvm_mmu_prepare_zap_page(kvm, sp, invalid_list))
goto restart;
}
kvm_mmu_commit_zap_page(kvm, invalid_list);

 Hmm, yes. So lets leave it as is and always commit invalid_list before

 So, you mean drop this patch and the patch of
 KVM: MMU: collapse TLB flushes when zap all pages?

 We still want to add kvm_reload_remote_mmus() to
 kvm_mmu_invalidate_zap_all_pages(). But yes, we disable a nice
 optimization here. So may be skipping obsolete pages while walking
 hashtable is better solution.

 I am willing to use this way instead, but it looks worse than this
 patch:

 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..810410c 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
ASSERT(is_empty_shadow_page(sp-spt));
 -  hlist_del(sp-hash_link);
 +  hlist_del_init(sp-hash_link);
 Why not drop this

list_del(sp-link);
free_page((unsigned long)sp-spt);
if (!sp-role.direct)
 @@ -1648,14 +1648,20 @@ static int kvm_mmu_prepare_zap_page(struct kvm 
 *kvm, struct kvm_mmu_page *sp,
  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);

 +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 +{
 +  return unlikely(sp-mmu_valid_gen != kvm-arch.mmu_valid_gen);
 +}
 +
  #define for_each_gfn_sp(_kvm, _sp, _gfn)  \

Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-23 Thread Xiao Guangrong
On 05/24/2013 01:39 PM, Xiao Guangrong wrote:

 if (kvm_mmu_prepare_zap_page(sp, list))
 hlist_del(sp-hlist);

 Or, i missed your suggestion?
 My assumption is that we can leave obsolete shadow pages on hashtable
 till commit_zap time.
 
 Ah, i see.
 
 Yes, i agree with your idea. I think we can only skip the obsolete-and-invalid
 page since the obsolete-but-unzapped page still affects the mmu's behaviour,
 for example, it can cause page write-protect, kvm_mmu_unprotect_page()
 can not work by skipping unzapped-obsolete pages.

kvm_mmu_unprotect_page() can work, we can skip obsolete pages too when detect
whether need to write-protect a page, it is easier to make the page become
writable when zapping obsolete pages.

Will update it following your idea, sorry for my noise.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-22 Thread Gleb Natapov
On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
> It is only used to zap the obsolete page. Since the obsolete page
> will not be used, we need not spend time to find its unsync children
> out. Also, we delete the page from shadow page cache so that the page
> is completely isolated after call this function.
> 
> The later patch will use it to collapse tlb flushes
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/kvm/mmu.c |   46 +-
>  1 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9b57faa..e676356 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
> *kvm, int nr)
>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  {
>   ASSERT(is_empty_shadow_page(sp->spt));
> - hlist_del(>hash_link);
> + hlist_del_init(>hash_link);
Why do you need hlist_del_init() here? Why not move it into
kvm_mmu_prepare_zap_page() like we discussed it here:
https://patchwork.kernel.org/patch/2580351/ instead of doing
it differently for obsolete and non obsolete pages?

>   list_del(>link);
>   free_page((unsigned long)sp->spt);
>   if (!sp->role.direct)
> @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>   return zapped;
>  }
>  
> -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> - struct list_head *invalid_list)
> +static int
> +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> +bool zap_unsync_children,
> +struct list_head *invalid_list)
>  {
> - int ret;
> + int ret = 0;
>  
>   trace_kvm_mmu_prepare_zap_page(sp);
>   ++kvm->stat.mmu_shadow_zapped;
> - ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> +
> + if (likely(zap_unsync_children))
> + ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
> +
>   kvm_mmu_page_unlink_children(kvm, sp);
>   kvm_mmu_unlink_parents(kvm, sp);
>  
> @@ -2099,6 +2104,37 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
> struct kvm_mmu_page *sp,
>   return ret;
>  }
>  
> +/*
> + * The obsolete page will not be used, we need not spend time to find
> + * its unsync children out. Also, we delete the page from shadow page
> + * cache so that the page is completely isolated after call this
> + * function.
> + *
> + * Note: if we use this function in for_each_gfn_xxx macros, we should
> + * re-walk the list when it successfully zaps one page.
> + */
> +static int
> +kvm_mmu_prepare_zap_obsolete_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> +   struct list_head *invalid_list)
> +{
> + int ret;
> +
> + WARN_ON(!is_obsolete_sp(kvm, sp));
> +
> + ret = __kvm_mmu_prepare_zap_page(kvm, sp, false, invalid_list);
> + if (ret)
> + hlist_del_init(>hash_link);
Why hlist_del() is not enough?

> +
> + WARN_ON(ret > 1);
> + return ret;
> +}
> +
> +static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> + struct list_head *invalid_list)
> +{
> + return __kvm_mmu_prepare_zap_page(kvm, sp, true, invalid_list);
> +}
> +
>  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
>   struct list_head *invalid_list)
>  {
> -- 
> 1.7.7.6

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v7 09/11] KVM: MMU: introduce kvm_mmu_prepare_zap_obsolete_page

2013-05-22 Thread Gleb Natapov
On Thu, May 23, 2013 at 03:55:58AM +0800, Xiao Guangrong wrote:
 It is only used to zap the obsolete page. Since the obsolete page
 will not be used, we need not spend time to find its unsync children
 out. Also, we delete the page from shadow page cache so that the page
 is completely isolated after call this function.
 
 The later patch will use it to collapse tlb flushes
 
 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/kvm/mmu.c |   46 +-
  1 files changed, 41 insertions(+), 5 deletions(-)
 
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 9b57faa..e676356 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1466,7 +1466,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm 
 *kvm, int nr)
  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
  {
   ASSERT(is_empty_shadow_page(sp-spt));
 - hlist_del(sp-hash_link);
 + hlist_del_init(sp-hash_link);
Why do you need hlist_del_init() here? Why not move it into
kvm_mmu_prepare_zap_page() like we discussed it here:
https://patchwork.kernel.org/patch/2580351/ instead of doing
it differently for obsolete and non obsolete pages?

   list_del(sp-link);
   free_page((unsigned long)sp-spt);
   if (!sp-role.direct)
 @@ -2069,14 +2069,19 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
   return zapped;
  }
  
 -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 - struct list_head *invalid_list)
 +static int
 +__kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +bool zap_unsync_children,
 +struct list_head *invalid_list)
  {
 - int ret;
 + int ret = 0;
  
   trace_kvm_mmu_prepare_zap_page(sp);
   ++kvm-stat.mmu_shadow_zapped;
 - ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
 + if (likely(zap_unsync_children))
 + ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 +
   kvm_mmu_page_unlink_children(kvm, sp);
   kvm_mmu_unlink_parents(kvm, sp);
  
 @@ -2099,6 +2104,37 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, 
 struct kvm_mmu_page *sp,
   return ret;
  }
  
 +/*
 + * The obsolete page will not be used, we need not spend time to find
 + * its unsync children out. Also, we delete the page from shadow page
 + * cache so that the page is completely isolated after call this
 + * function.
 + *
 + * Note: if we use this function in for_each_gfn_xxx macros, we should
 + * re-walk the list when it successfully zaps one page.
 + */
 +static int
 +kvm_mmu_prepare_zap_obsolete_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 +   struct list_head *invalid_list)
 +{
 + int ret;
 +
 + WARN_ON(!is_obsolete_sp(kvm, sp));
 +
 + ret = __kvm_mmu_prepare_zap_page(kvm, sp, false, invalid_list);
 + if (ret)
 + hlist_del_init(sp-hash_link);
Why hlist_del() is not enough?

 +
 + WARN_ON(ret  1);
 + return ret;
 +}
 +
 +static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 + struct list_head *invalid_list)
 +{
 + return __kvm_mmu_prepare_zap_page(kvm, sp, true, invalid_list);
 +}
 +
  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
   struct list_head *invalid_list)
  {
 -- 
 1.7.7.6

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/