'walk_addr' is out of mmu_lock's protection, so while we handle 'fetch',
then guest's mapping has modifited by other vcpu's write path, such as
invlpg, pte_write and other fetch path

Fixed by checking all level's mapping

Signed-off-by: Xiao Guangrong <[email protected]>
---
 arch/x86/kvm/paging_tmpl.h |   73 ++++++++++++++++++++++++++------------------
 1 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 19f0077..f58a5c4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -300,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                         int *ptwrite, pfn_t pfn)
 {
        unsigned access = gw->pt_access;
-       struct kvm_mmu_page *sp;
+       struct kvm_mmu_page *sp = NULL;
        u64 spte, *sptep = NULL;
        int direct;
        gfn_t table_gfn;
@@ -319,22 +319,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t 
addr,
                direct_access &= ~ACC_WRITE_MASK;
 
        for_each_shadow_entry(vcpu, addr, iterator) {
+               bool nonpresent = false, last_mapping = false;
+
                level = iterator.level;
                sptep = iterator.sptep;
-               if (iterator.level == hlevel) {
-                       mmu_set_spte(vcpu, sptep, access,
-                                    gw->pte_access & access,
-                                    user_fault, write_fault,
-                                    dirty, ptwrite, level,
-                                    gw->gfn, pfn, false, true);
-                       break;
+
+               if (level == hlevel) {
+                       last_mapping = true;
+                       goto check_set_spte;
                }
 
-               if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
-                       struct kvm_mmu_page *child;
+               if (is_large_pte(*sptep)) {
+                       drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
+                       kvm_flush_remote_tlbs(vcpu->kvm);
+               }
 
-                       if (level != gw->level)
-                               continue;
+               if (is_shadow_present_pte(*sptep) && level == gw->level) {
+                       struct kvm_mmu_page *child;
 
                        /*
                         * For the direct sp, if the guest pte's dirty bit
@@ -344,19 +345,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t 
addr,
                         * a new sp with the correct access.
                         */
                        child = page_header(*sptep & PT64_BASE_ADDR_MASK);
-                       if (child->role.access == direct_access)
-                               continue;
-
-                       mmu_page_remove_parent_pte(child, sptep);
-                       __set_spte(sptep, shadow_trap_nonpresent_pte);
-                       kvm_flush_remote_tlbs(vcpu->kvm);
+                       if (child->role.access != direct_access) {
+                               mmu_page_remove_parent_pte(child, sptep);
+                               __set_spte(sptep, shadow_trap_nonpresent_pte);
+                               kvm_flush_remote_tlbs(vcpu->kvm);
+                       }
                }
 
-               if (is_large_pte(*sptep)) {
-                       drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
-                       kvm_flush_remote_tlbs(vcpu->kvm);
-               }
+               if (is_shadow_present_pte(*sptep))
+                       goto check_set_spte;
 
+               nonpresent = true;
                if (level <= gw->level) {
                        direct = 1;
                        access = direct_access;
@@ -374,22 +373,36 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t 
addr,
                }
                sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
                                               direct, access, sptep);
-               if (!direct) {
+check_set_spte:
+               if (level >= gw->level) {
                        r = kvm_read_guest_atomic(vcpu->kvm,
-                                                 gw->pte_gpa[level - 2],
+                                                 gw->pte_gpa[level - 1],
                                                  &curr_pte, sizeof(curr_pte));
-                       if (r || curr_pte != gw->ptes[level - 2]) {
-                               kvm_mmu_put_page(sp, sptep);
+                       if (r || curr_pte != gw->ptes[level - 1]) {
+                               if (nonpresent)
+                                       kvm_mmu_put_page(sp, sptep);
                                kvm_release_pfn_clean(pfn);
                                sptep = NULL;
                                break;
                        }
                }
 
-               spte = __pa(sp->spt)
-                       | PT_PRESENT_MASK | PT_ACCESSED_MASK
-                       | PT_WRITABLE_MASK | PT_USER_MASK;
-               *sptep = spte;
+               if (nonpresent) {
+                       spte = __pa(sp->spt)
+                               | PT_PRESENT_MASK | PT_ACCESSED_MASK
+                               | PT_WRITABLE_MASK | PT_USER_MASK;
+                       *sptep = spte;
+                       continue;
+               }
+
+               if (last_mapping) {
+                       mmu_set_spte(vcpu, sptep, access,
+                                    gw->pte_access & access,
+                                    user_fault, write_fault,
+                                    dirty, ptwrite, level,
+                                    gw->gfn, pfn, false, true);
+                       break;
+               }
        }
 
        return sptep;
-- 
1.6.1.2


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to