Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction
On 09/13/2011 09:24 PM, Xiao Guangrong wrote: +static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + unsigned long cr2, int emulation_type) +{ +if (!vcpu-arch.mmu.direct_map !mmu_is_nested(vcpu)) +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no? Yeah, will fix it. And this bug also exists in the current code: it always uses L2 gpa to emulate write operation. Can you please send this fix separately, so it can be backported if needed? I guess the reason that it is not triggered is: the gpa of L2's shadow page can not be touched by L2, it means no page table is write-protected by L2. Yes. All real guest hypervisors will do that. But it is technically possible for a hypervisor to allow its guest access to the real page tables. btw, I don't see mmu.direct_map initialized for nested npt? nested_svm_vmrun() - nested_svm_init_mmu_context(): static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { int r; r = kvm_init_shadow_mmu(vcpu,vcpu-arch.mmu); vcpu-arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu-arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; vcpu-arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu-arch.mmu.shadow_root_level = get_npt_level(); vcpu-arch.walk_mmu =vcpu-arch.nested_mmu; return r; } It is initialized in kvm_init_shadow_mmu :-) Yes, need new eyeglasses. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction
On 09/14/2011 05:53 PM, Avi Kivity wrote: On 09/13/2011 09:24 PM, Xiao Guangrong wrote: +static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + unsigned long cr2, int emulation_type) +{ +if (!vcpu-arch.mmu.direct_map !mmu_is_nested(vcpu)) +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no? Yeah, will fix it. And this bug also exists in the current code: it always uses L2 gpa to emulate write operation. Can you please send this fix separately, so it can be backported if needed? Sure, i will do it as soon as possible. :-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction
On 09/14/2011 06:19 PM, Xiao Guangrong wrote: On 09/14/2011 05:53 PM, Avi Kivity wrote: On 09/13/2011 09:24 PM, Xiao Guangrong wrote: +static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + unsigned long cr2, int emulation_type) +{ +if (!vcpu-arch.mmu.direct_map !mmu_is_nested(vcpu)) +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no? Yeah, will fix it. And this bug also exists in the current code: it always uses L2 gpa to emulate write operation. Can you please send this fix separately, so it can be backported if needed? Sure, i will do it as soon as possible. :-) I am so sorry, the current code is good, it has already translated L2 gpa to L1 gpa: vcpu-arch.nested_mmu.translate_gpa = translate_nested_gpa; Please ignore it. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction
On 09/13/2011 06:47 PM, Avi Kivity wrote: On 08/30/2011 05:35 AM, Xiao Guangrong wrote: If the emulation is caused by #PF and it is non-page_table writing instruction, it means the VM-EXIT is caused by shadow page protected, we can zap the shadow page and retry this instruction directly The idea is from Avi int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); +bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt); Please use the usual x86_ prefix used in the emulator interface. OK, will fix. @@ -3720,10 +3721,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) kvm_mmu_commit_zap_page(vcpu-kvm,invalid_list); } +static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) +{ +if (vcpu-arch.mmu.direct_map || mmu_is_nested(vcpu)) +return vcpu_match_mmio_gpa(vcpu, addr); + +return vcpu_match_mmio_gva(vcpu, addr); +} + int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, void *insn, int insn_len) { -int r; +int r, emulation_type = EMULTYPE_RETRY; enum emulation_result er; r = vcpu-arch.mmu.page_fault(vcpu, cr2, error_code, false); @@ -3735,7 +3744,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, goto out; } -er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); +if (is_mmio_page_fault(vcpu, cr2)) +emulation_type = 0; + +er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); switch (er) { case EMULATE_DONE: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b37f18..1afe59e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4814,6 +4814,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) return false; } +static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + unsigned long cr2, int emulation_type) +{ +if (!vcpu-arch.mmu.direct_map !mmu_is_nested(vcpu)) +gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); If mmu_is_nested() cr2 is an ngpa, we have to translate it to a gpa, no? Yeah, will fix it. And this bug also exists in the current code: it always uses L2 gpa to emulate write operation. I guess the reason that it is not triggered is: the gpa of L2's shadow page can not be touched by L2, it means no page table is write-protected by L2. btw, I don't see mmu.direct_map initialized for nested npt? nested_svm_vmrun() - nested_svm_init_mmu_context(): static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { int r; r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu); vcpu-arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu-arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; vcpu-arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu-arch.mmu.shadow_root_level = get_npt_level(); vcpu-arch.walk_mmu = vcpu-arch.nested_mmu; return r; } It is initialized in kvm_init_shadow_mmu :-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 03/11] KVM: x86: retry non-page-table writing instruction
If the emulation is caused by #PF and it is non-page_table writing instruction, it means the VM-EXIT is caused by shadow page protected, we can zap the shadow page and retry this instruction directly The idea is from Avi Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com --- arch/x86/include/asm/kvm_emulate.h |1 + arch/x86/include/asm/kvm_host.h|5 arch/x86/kvm/emulate.c |5 arch/x86/kvm/mmu.c | 22 + arch/x86/kvm/x86.c | 47 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 6040d11..fa87b63 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -362,6 +362,7 @@ enum x86_intercept { #endif int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); +bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6ab4241..27a25df 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -443,6 +443,9 @@ struct kvm_vcpu_arch { cpumask_var_t wbinvd_dirty_mask; + unsigned long last_retry_eip; + unsigned long last_retry_addr; + struct { bool halted; gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; @@ -689,6 +692,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 0) #define EMULTYPE_TRAP_UD (1 1) #define EMULTYPE_SKIP (1 2) +#define EMULTYPE_RETRY (1 3) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -753,6 +757,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes, bool guest_initiated); +int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e24c269..c62424e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3691,6 +3691,11 @@ done: return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; } +bool page_table_writing_insn(struct x86_emulate_ctxt *ctxt) +{ + return ctxt-d PageTable; +} + static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) { /* The second termination condition only applies for REPE diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b01afee..26aae11 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1997,7 +1997,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) kvm-arch.n_max_mmu_pages = goal_nr_mmu_pages; } -static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) +int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) { struct kvm_mmu_page *sp; struct hlist_node *node; @@ -2007,6 +2007,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) pgprintk(%s: looking for gfn %llx\n, __func__, gfn); r = 0; + spin_lock(kvm-mmu_lock); for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { pgprintk(%s: gfn %llx role %x\n, __func__, gfn, sp-role.word); @@ -2014,8 +2015,10 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } kvm_mmu_commit_zap_page(kvm, invalid_list); + spin_unlock(kvm-mmu_lock); return r; } +EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) { @@ -3697,9 +3700,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); - spin_lock(vcpu-kvm-mmu_lock); r = kvm_mmu_unprotect_page(vcpu-kvm, gpa PAGE_SHIFT); - spin_unlock(vcpu-kvm-mmu_lock); return r; } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); @@ -3720,10 +3721,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) kvm_mmu_commit_zap_page(vcpu-kvm, invalid_list); } +static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) +{ + if (vcpu-arch.mmu.direct_map || mmu_is_nested(vcpu)) + return vcpu_match_mmio_gpa(vcpu, addr); + + return vcpu_match_mmio_gva(vcpu, addr); +} + int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, void *insn, int insn_len) { - int r; + int r, emulation_type =