Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction

2011-09-14 Thread Avi Kivity

On 09/13/2011 09:24 PM, Xiao Guangrong wrote:


  +static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
  +  unsigned long cr2,  int emulation_type)
  +{
  +if (!vcpu-arch.mmu.direct_map   !mmu_is_nested(vcpu))
  +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);

  If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no?


Yeah, will fix it.

And this bug also exists in the current code: it always uses L2 gpa to emulate
write operation.


Can you please send this fix separately, so it can be backported if needed?


I guess the reason that it is not triggered is: the gpa of L2's shadow page can
not be touched by L2, it means no page table is write-protected by L2.


Yes.  All real guest hypervisors will do that.  But it is technically 
possible for a hypervisor to allow its guest access to the real page tables.



  btw, I don't see mmu.direct_map initialized for nested npt?


nested_svm_vmrun() -  nested_svm_init_mmu_context():
static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
int r;

r = kvm_init_shadow_mmu(vcpu,vcpu-arch.mmu);

vcpu-arch.mmu.set_cr3   = nested_svm_set_tdp_cr3;
vcpu-arch.mmu.get_cr3   = nested_svm_get_tdp_cr3;
vcpu-arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu-arch.mmu.shadow_root_level = get_npt_level();
vcpu-arch.walk_mmu  =vcpu-arch.nested_mmu;

return r;
}

It is initialized in kvm_init_shadow_mmu :-)


Yes, need new eyeglasses.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction

2011-09-14 Thread Xiao Guangrong
On 09/14/2011 05:53 PM, Avi Kivity wrote:
 On 09/13/2011 09:24 PM, Xiao Guangrong wrote:
 
   +static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
   +  unsigned long cr2,  int emulation_type)
   +{
   +if (!vcpu-arch.mmu.direct_map   !mmu_is_nested(vcpu))
   +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
   If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no?
 

 Yeah, will fix it.

 And this bug also exists in the current code: it always uses L2 gpa to 
 emulate
 write operation.
 
 Can you please send this fix separately, so it can be backported if needed?
 

Sure, i will do it as soon as possible. :-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction

2011-09-14 Thread Xiao Guangrong
On 09/14/2011 06:19 PM, Xiao Guangrong wrote:
 On 09/14/2011 05:53 PM, Avi Kivity wrote:
 On 09/13/2011 09:24 PM, Xiao Guangrong wrote:

  +static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
  +  unsigned long cr2,  int emulation_type)
  +{
  +if (!vcpu-arch.mmu.direct_map   !mmu_is_nested(vcpu))
  +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);

  If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no?


 Yeah, will fix it.

 And this bug also exists in the current code: it always uses L2 gpa to 
 emulate
 write operation.

 Can you please send this fix separately, so it can be backported if needed?

 
 Sure, i will do it as soon as possible. :-)

I am so sorry, the current code is good, it has already translated L2 gpa to
L1 gpa:

vcpu-arch.nested_mmu.translate_gpa = translate_nested_gpa;

Please ignore it.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction

2011-09-13 Thread Xiao Guangrong
On 09/13/2011 06:47 PM, Avi Kivity wrote:
 On 08/30/2011 05:35 AM, Xiao Guangrong wrote:
 If the emulation is caused by #PF and it is non-page_table writing 
 instruction,
 it means the VM-EXIT is caused by shadow page protected, we can zap the 
 shadow
 page and retry this instruction directly

 The idea is from Avi


   int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int 
 insn_len);
 +bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
 
 Please use the usual x86_ prefix used in the emulator interface.
 

OK, will fix.

 @@ -3720,10 +3721,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
   kvm_mmu_commit_zap_page(vcpu-kvm,invalid_list);
   }

 +static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
 +{
 +if (vcpu-arch.mmu.direct_map || mmu_is_nested(vcpu))
 +return vcpu_match_mmio_gpa(vcpu, addr);
 +
 +return vcpu_match_mmio_gva(vcpu, addr);
 +}
 +
   int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
  void *insn, int insn_len)
   {
 -int r;
 +int r, emulation_type = EMULTYPE_RETRY;
   enum emulation_result er;

   r = vcpu-arch.mmu.page_fault(vcpu, cr2, error_code, false);
 @@ -3735,7 +3744,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t 
 cr2, u32 error_code,
   goto out;
   }

 -er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
 +if (is_mmio_page_fault(vcpu, cr2))
 +emulation_type = 0;
 +
 +er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);

   switch (er) {
   case EMULATE_DONE:
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 6b37f18..1afe59e 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -4814,6 +4814,50 @@ static bool reexecute_instruction(struct kvm_vcpu 
 *vcpu, gva_t gva)
   return false;
   }

 +static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 +  unsigned long cr2,  int emulation_type)
 +{
 +if (!vcpu-arch.mmu.direct_map  !mmu_is_nested(vcpu))
 +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
 If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no?
 

Yeah, will fix it.

And this bug also exists in the current code: it always uses L2 gpa to emulate
write operation.
I guess the reason that it is not triggered is: the gpa of L2's shadow page can
not be touched by L2, it means no page table is write-protected by L2.

 btw, I don't see mmu.direct_map initialized for nested npt?
 

nested_svm_vmrun() - nested_svm_init_mmu_context():
static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
int r;

r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu);

vcpu-arch.mmu.set_cr3   = nested_svm_set_tdp_cr3;
vcpu-arch.mmu.get_cr3   = nested_svm_get_tdp_cr3;
vcpu-arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu-arch.mmu.shadow_root_level = get_npt_level();
vcpu-arch.walk_mmu  = vcpu-arch.nested_mmu;

return r;
}

It is initialized in kvm_init_shadow_mmu :-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction

2011-08-29 Thread Xiao Guangrong
If the emulation is caused by #PF and it is non-page_table writing instruction,
it means the VM-EXIT is caused by shadow page protected, we can zap the shadow
page and retry this instruction directly

The idea is from Avi

Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com
---
 arch/x86/include/asm/kvm_emulate.h |1 +
 arch/x86/include/asm/kvm_host.h|5 
 arch/x86/kvm/emulate.c |5 
 arch/x86/kvm/mmu.c |   22 +
 arch/x86/kvm/x86.c |   47 
 5 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 6040d11..fa87b63 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -362,6 +362,7 @@ enum x86_intercept {
 #endif
 
 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
 #define EMULATION_FAILED -1
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ab4241..27a25df 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -443,6 +443,9 @@ struct kvm_vcpu_arch {
 
cpumask_var_t wbinvd_dirty_mask;
 
+   unsigned long last_retry_eip;
+   unsigned long last_retry_addr;
+
struct {
bool halted;
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -689,6 +692,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE (1  0)
 #define EMULTYPE_TRAP_UD   (1  1)
 #define EMULTYPE_SKIP  (1  2)
+#define EMULTYPE_RETRY (1  3)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
 
@@ -753,6 +757,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
   const u8 *new, int bytes,
   bool guest_initiated);
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e24c269..c62424e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3691,6 +3691,11 @@ done:
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
+bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
+{
+   return ctxt-d  PageTable;
+}
+
 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
 {
/* The second termination condition only applies for REPE
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b01afee..26aae11 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1997,7 +1997,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
int goal_nr_mmu_pages)
kvm-arch.n_max_mmu_pages = goal_nr_mmu_pages;
 }
 
-static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *sp;
struct hlist_node *node;
@@ -2007,6 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t 
gfn)
pgprintk(%s: looking for gfn %llx\n, __func__, gfn);
r = 0;
 
+   spin_lock(kvm-mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
pgprintk(%s: gfn %llx role %x\n, __func__, gfn,
 sp-role.word);
@@ -2014,8 +2015,10 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t 
gfn)
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
kvm_mmu_commit_zap_page(kvm, invalid_list);
+   spin_unlock(kvm-mmu_lock);
return r;
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
 static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
@@ -3697,9 +3700,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, 
gva_t gva)
 
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
-   spin_lock(vcpu-kvm-mmu_lock);
r = kvm_mmu_unprotect_page(vcpu-kvm, gpa  PAGE_SHIFT);
-   spin_unlock(vcpu-kvm-mmu_lock);
return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3720,10 +3721,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
kvm_mmu_commit_zap_page(vcpu-kvm, invalid_list);
 }
 
+static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
+{
+   if (vcpu-arch.mmu.direct_map || mmu_is_nested(vcpu))
+   return vcpu_match_mmio_gpa(vcpu, addr);
+
+   return vcpu_match_mmio_gva(vcpu, addr);
+}
+
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
   void *insn, int insn_len)
 {
-   int r;
+   int r, emulation_type =