Currently we cannot do the isolation of mmu pages, i.e. deleting the
current hash_link node by hlist_del(), in this function, because we
may call it while traversing the linked list; we cannot solve the
problem by hlist_for_each_entry_safe as zapping can happen recursively.

Since the isolation must be done before releasing mmu_lock, we are now
forced to call kvm_mmu_isolate_page() for each mmu page found in the
invalid_list in kvm_mmu_commit_zap_page().

This patch adds a new parameter to kvm_mmu_prepare_zap_page() as a
preparation for solving this issue: all callers just pass NULL now.

Note: the abstraction, the introduction of sp_next_pos, makes it
possible to support the other list later.

Signed-off-by: Takuya Yoshikawa <[email protected]>
---
 arch/x86/kvm/mmu.c |   41 +++++++++++++++++++++++++++--------------
 1 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46b1435..2a48533 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1653,8 +1653,18 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, 
struct kvm_mmu_page *sp)
        --kvm->stat.mmu_unsync;
 }
 
+/*
+ * Used to hold a pointer to the next mmu page's node when traversing through
+ * one of the linked lists.  This must be updated correctly when deleting any
+ * entries from the list.
+ */
+struct sp_next_pos {
+       struct hlist_node *hn;  /* next hash_link node */
+};
+
 static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-                                   struct list_head *invalid_list);
+                                   struct list_head *invalid_list,
+                                   struct sp_next_pos *npos);
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
                                    struct list_head *invalid_list);
 
@@ -1672,7 +1682,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
                           struct list_head *invalid_list, bool clear_unsync)
 {
        if (sp->role.cr4_pae != !!is_pae(vcpu)) {
-               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list, NULL);
                return 1;
        }
 
@@ -1680,7 +1690,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
                kvm_unlink_unsync_page(vcpu->kvm, sp);
 
        if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
-               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list, NULL);
                return 1;
        }
 
@@ -1730,7 +1740,8 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t 
gfn)
                kvm_unlink_unsync_page(vcpu->kvm, s);
                if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
                        (vcpu->arch.mmu.sync_page(vcpu, s))) {
-                       kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
+                       kvm_mmu_prepare_zap_page(vcpu->kvm, s,
+                                                &invalid_list, NULL);
                        continue;
                }
                flush = true;
@@ -2062,7 +2073,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
                struct kvm_mmu_page *sp;
 
                for_each_sp(pages, sp, parents, i) {
-                       kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+                       kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, NULL);
                        mmu_pages_clear_parents(&parents);
                        zapped++;
                }
@@ -2073,7 +2084,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
 }
 
 static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-                                   struct list_head *invalid_list)
+                                   struct list_head *invalid_list,
+                                   struct sp_next_pos *npos)
 {
        int ret;
 
@@ -2149,7 +2161,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
int goal_nr_mmu_pages)
 
                        page = container_of(kvm->arch.active_mmu_pages.prev,
                                            struct kvm_mmu_page, link);
-                       kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
+                       kvm_mmu_prepare_zap_page(kvm, page, &invalid_list, 
NULL);
                }
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
                goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
@@ -2174,7 +2186,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
                pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
                         sp->role.word);
                r = 1;
-               kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+               kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, NULL);
        }
        kvm_mmu_commit_zap_page(kvm, &invalid_list);
        spin_unlock(&kvm->mmu_lock);
@@ -2894,7 +2906,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                sp = page_header(root);
                --sp->root_count;
                if (!sp->root_count && sp->role.invalid) {
-                       kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+                       kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+                                                &invalid_list, NULL);
                        kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
                }
                vcpu->arch.mmu.root_hpa = INVALID_PAGE;
@@ -2910,7 +2923,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                        --sp->root_count;
                        if (!sp->root_count && sp->role.invalid)
                                kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
-                                                        &invalid_list);
+                                                        &invalid_list, NULL);
                }
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
        }
@@ -3987,7 +4000,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                if (detect_write_misaligned(sp, gpa, bytes) ||
                      detect_write_flooding(sp)) {
                        zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
-                                                    &invalid_list);
+                                                       &invalid_list, NULL);
                        ++vcpu->kvm->stat.mmu_flooded;
                        continue;
                }
@@ -4041,7 +4054,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 
                sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
                                  struct kvm_mmu_page, link);
-               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+               kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list, NULL);
                ++vcpu->kvm->stat.mmu_recycled;
        }
        kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -4203,7 +4216,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
        spin_lock(&kvm->mmu_lock);
 restart:
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
-               if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
+               if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, NULL))
                        goto restart;
 
        kvm_mmu_commit_zap_page(kvm, &invalid_list);
@@ -4220,7 +4233,7 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct 
kvm *kvm,
 
        page = container_of(kvm->arch.active_mmu_pages.prev,
                            struct kvm_mmu_page, link);
-       kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
+       kvm_mmu_prepare_zap_page(kvm, page, invalid_list, NULL);
 }
 
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to