On Tue, Dec 18, 2007 at 05:49:51PM +0200, Avi Kivity wrote:
> Marcelo Tosatti wrote:
> >Hi, 
> >
> >The following is an improvement on top of an earlier patch by Izik. It
> >increases pagefault scalability for SMP guests by allowing concurrent
> >guest walking, allocation and instruction emulation on the fault path.
> >
> >The test being used is pft, which starts a number of threads
> >allocating and writing malloc()'ed memory. pft.c can be found at
> >http://lkml.org/lkml/2004/8/15/58
> >
> >The script being used is:
> >
> >bytes=$((400*1024*1024))
> >./pft -t -b$bytes -r10 -f1
> >./pft -b$bytes -r10 -f2
> >./pft -b$bytes -r10 -f3
> >./pft -b$bytes -r10 -f4
> >./pft -b$bytes -r10 -f8
> >
> >This is a 4-way guest.
> >
> >One important detail from the results is that there is no difference
> >for the two threads case, but beyond that we see a clear improvement.
> >follow_page() is showing up high in profiling, so I believe this
> >is partly due to the fact that it does follow_page() twice while
> >holding the lock, once in mmu_set_spte() from walk_addr() and again in
> >mmu_set_spte() in fetch() - I'm looking into removing those duplicated
> >calls for the same gfn.
> >
> >The patch still lacks the copy_from_user_inatomic() change in
> >prefetch_page() to avoid a potential sleep in case the page is swapped
> >out. 
> >
> >Another issue is that now fetch() will re-read the pte's after
> >instantiating a shadow page, but in theory they could be swapped out. I
> >believe that is safe since walk_addr() just touched the pte's bringing
> >them in from swap.
> >  
> 
> We need to convert that to kvm_read_guest_atomic() to avoid even that 
> theoretical race.  If the read fails, we can simply return and let the 
> guest retry the faulting instruction.

Updated patch, now feature complete. Changes from last version:

- Use __gfn_to_page in cmpxchg_pte() to avoid potential deadlock
- Add kvm_read_guest_inatomic() and use it in fetch()
- Make prefetch_page() use copy_from_user_inatomic()
- Pass grabbed page down to mmu_set_spte to avoid a potential schedule 
  with mmu_lock held (this could happen even without the page being 
  swapped out because get_user_pages() calls cond_resched).
- Convert a few missing mutex lock users to mmap_sem.
- Grab the mutex lock when calling kvm_iodevice_{read,write}

Please review.

Tests on 4-way guest:

KVM stock:
 Gb Rep Threads   User      System     Wall flt/cpu/s fault/wsec
  0  10    1    0.368s      5.440s   6.017s176297.521 165958.112
  0  10    2    0.520s      7.144s   4.023s133603.358 241902.916
  0  10    3    0.576s     11.292s   4.061s 86277.053 221972.262
  0  10    4    0.596s     14.996s   4.058s 65670.603 223380.197
  0  10    8    0.916s     14.772s   4.063s 65268.743 220801.490


KVM + scale-2.patch:
 Gb Rep Threads   User      System     Wall flt/cpu/s fault/wsec
  0  10    1    0.296s      4.976s   6.006s194221.567 168951.621
  0  10    2    0.408s      6.208s   3.084s154766.639 266578.709
  0  10    3    0.528s      6.736s   2.093s140960.353 348877.073
  0  10    4    0.548s      7.988s   2.059s119955.022 394976.087
  0  10    8    1.596s      7.896s   3.016s107873.592 323434.429


diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 401eb7c..1b375ba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -810,7 +810,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
kvm_nr_mmu_pages)
         * number of actived pages , we must to free some mmu pages before we
         * change the value
         */
-
+       spin_lock(&kvm->mmu_lock);
        if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
            kvm_nr_mmu_pages) {
                int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
@@ -831,6 +831,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
kvm_nr_mmu_pages)
                                         - kvm->arch.n_alloc_mmu_pages;
 
        kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages;
+       spin_unlock(&kvm->mmu_lock);
 }
 
 static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -879,13 +880,13 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 
        if (gpa == UNMAPPED_GVA)
                return NULL;
-       return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       return __gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
                         unsigned pt_access, unsigned pte_access,
                         int user_fault, int write_fault, int dirty,
-                        int *ptwrite, gfn_t gfn)
+                        int *ptwrite, gfn_t gfn, struct page *userpage)
 {
        u64 spte;
        int was_rmapped = is_rmap_pte(*shadow_pte);
@@ -907,7 +908,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
        if (!(pte_access & ACC_EXEC_MASK))
                spte |= PT64_NX_MASK;
 
-       page = gfn_to_page(vcpu->kvm, gfn);
+       if (userpage) {
+               page = userpage;
+               get_page(page);
+       } else
+               page = __gfn_to_page(vcpu->kvm, gfn);
 
        spte |= PT_PRESENT_MASK;
        if (pte_access & ACC_USER_MASK)
@@ -984,7 +989,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
int write, gfn_t gfn)
 
                if (level == 1) {
                        mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-                                    0, write, 1, &pt_write, gfn);
+                                    0, write, 1, &pt_write, gfn, NULL);
                        return pt_write || is_io_pte(table[index]);
                }
 
@@ -1026,6 +1031,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                return;
+       spin_lock(&vcpu->kvm->mmu_lock);
 #ifdef CONFIG_X86_64
        if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
                hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -1033,6 +1039,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                sp = page_header(root);
                --sp->root_count;
                vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+               spin_unlock(&vcpu->kvm->mmu_lock);
                return;
        }
 #endif
@@ -1047,6 +1054,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
        }
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+       spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
 static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
@@ -1129,6 +1137,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
        context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = nonpaging_page_fault;
        context->gva_to_gpa = nonpaging_gva_to_gpa;
+       context->pte_to_page = NULL;
        context->free = nonpaging_free;
        context->prefetch_page = nonpaging_prefetch_page;
        context->root_level = 0;
@@ -1177,6 +1186,7 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
        context->new_cr3 = paging_new_cr3;
        context->page_fault = paging64_page_fault;
        context->gva_to_gpa = paging64_gva_to_gpa;
+       context->pte_to_page = paging64_pte_to_page;
        context->prefetch_page = paging64_prefetch_page;
        context->free = paging_free;
        context->root_level = level;
@@ -1197,6 +1207,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
        context->new_cr3 = paging_new_cr3;
        context->page_fault = paging32_page_fault;
        context->gva_to_gpa = paging32_gva_to_gpa;
+       context->pte_to_page = paging32_pte_to_page;
        context->free = paging_free;
        context->prefetch_page = paging32_prefetch_page;
        context->root_level = PT32_ROOT_LEVEL;
@@ -1245,15 +1256,16 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
        int r;
 
-       mutex_lock(&vcpu->kvm->lock);
        r = mmu_topup_memory_caches(vcpu);
        if (r)
                goto out;
+
+       spin_lock(&vcpu->kvm->mmu_lock);
        mmu_alloc_roots(vcpu);
+       spin_unlock(&vcpu->kvm->mmu_lock);
        kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
        kvm_mmu_flush_tlb(vcpu);
 out:
-       mutex_unlock(&vcpu->kvm->lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_load);
@@ -1286,7 +1298,8 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
                                  struct kvm_mmu_page *sp,
                                  u64 *spte,
                                  const void *new, int bytes,
-                                 int offset_in_pte)
+                                 int offset_in_pte,
+                                 struct page *userpage)
 {
        if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
                ++vcpu->kvm->stat.mmu_pde_zapped;
@@ -1295,9 +1308,11 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 
        ++vcpu->kvm->stat.mmu_pte_updated;
        if (sp->role.glevels == PT32_ROOT_LEVEL)
-               paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+               paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte,
+                                   userpage);
        else
-               paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+               paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte,
+                                   userpage);
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -1329,7 +1344,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu 
*vcpu)
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                      const u8 *new, int bytes)
+                      const u8 *new, int bytes, struct page *userpage)
 {
        gfn_t gfn = gpa >> PAGE_SHIFT;
        struct kvm_mmu_page *sp;
@@ -1410,7 +1425,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                        entry = *spte;
                        mmu_pte_write_zap_pte(vcpu, sp, spte);
                        mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
-                                             page_offset & (pte_size - 1));
+                                             page_offset & (pte_size - 1),
+                                             userpage);
                        mmu_pte_write_flush_tlb(vcpu, entry, *spte);
                        ++spte;
                }
@@ -1420,13 +1436,22 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 {
-       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa_t gpa;
+       int r;
+
+       down_read(&current->mm->mmap_sem);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       up_read(&current->mm->mmap_sem);
 
-       return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       spin_lock(&vcpu->kvm->mmu_lock);
+       r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       return r;
 }
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 {
+       spin_lock(&vcpu->kvm->mmu_lock);
        while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
                struct kvm_mmu_page *sp;
 
@@ -1435,6 +1460,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
                kvm_mmu_zap_page(vcpu->kvm, sp);
                ++vcpu->kvm->stat.mmu_recycled;
        }
+       spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -1442,7 +1468,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
        int r;
        enum emulation_result er;
 
-       mutex_lock(&vcpu->kvm->lock);
        r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
        if (r < 0)
                goto out;
@@ -1457,7 +1482,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
                goto out;
 
        er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
-       mutex_unlock(&vcpu->kvm->lock);
 
        switch (er) {
        case EMULATE_DONE:
@@ -1472,7 +1496,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
                BUG();
        }
 out:
-       mutex_unlock(&vcpu->kvm->lock);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -1569,8 +1592,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
 
+       spin_lock(&kvm->mmu_lock);
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
                kvm_mmu_zap_page(kvm, sp);
+       spin_unlock(&kvm->mmu_lock);
 
        kvm_flush_remote_tlbs(kvm);
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 56b88f7..b02be2e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -68,6 +68,7 @@ struct guest_walker {
        pt_element_t ptes[PT_MAX_FULL_LEVELS];
        gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
        unsigned pt_access;
+       struct page *page;
        unsigned pte_access;
        gfn_t gfn;
        u32 error_code;
@@ -91,7 +92,7 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
        pt_element_t *table;
        struct page *page;
 
-       page = gfn_to_page(kvm, table_gfn);
+       page = __gfn_to_page(kvm, table_gfn);
        table = kmap_atomic(page, KM_USER0);
 
        ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -186,6 +187,7 @@ walk:
 
                if (walker->level == PT_PAGE_TABLE_LEVEL) {
                        walker->gfn = gpte_to_gfn(pte);
+                       walker->page = __gfn_to_page(vcpu->kvm, walker->gfn);
                        break;
                }
 
@@ -196,6 +198,7 @@ walk:
                        walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
                        if (PTTYPE == 32 && is_cpuid_PSE36())
                                walker->gfn += pse36_gfn_delta(pte);
+                       walker->page = __gfn_to_page(vcpu->kvm, walker->gfn);
                        break;
                }
 
@@ -209,10 +212,15 @@ walk:
                mark_page_dirty(vcpu->kvm, table_gfn);
                ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
                            pte|PT_DIRTY_MASK);
-               if (ret)
+               if (ret) { 
+                       kvm_release_page_clean(walker->page);
                        goto walk;
+               }
                pte |= PT_DIRTY_MASK;
-               kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+               spin_lock(&vcpu->kvm->mmu_lock);
+               kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte),
+                                 walker->page);
+               spin_unlock(&vcpu->kvm->mmu_lock);
                walker->ptes[walker->level - 1] = pte;
        }
 
@@ -241,7 +249,7 @@ err:
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
                              u64 *spte, const void *pte, int bytes,
-                             int offset_in_pte)
+                             int offset_in_pte, struct page *userpage)
 {
        pt_element_t gpte;
        unsigned pte_access;
@@ -257,7 +265,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *page,
        pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
        pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
        mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-                    gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte));
+                    gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), userpage);
 }
 
 /*
@@ -316,11 +324,16 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t 
addr,
                                               metaphysical, access,
                                               shadow_ent, &new_page);
                if (new_page && !metaphysical) {
+                       int r;
                        pt_element_t curr_pte;
-                       kvm_read_guest(vcpu->kvm, walker->pte_gpa[level - 2],
-                                      &curr_pte, sizeof(curr_pte));
-                       if (curr_pte != walker->ptes[level - 2])
-                               return NULL;
+                       r = kvm_read_guest_inatomic(vcpu->kvm,
+                                                   walker->pte_gpa[level - 2],
+                                                   &curr_pte,
+                                                   sizeof(curr_pte));
+                       if (r || curr_pte != walker->ptes[level - 2]) {
+                               shadow_ent = NULL;
+                               goto out;
+                       }
                }
                shadow_addr = __pa(shadow_page->spt);
                shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
@@ -331,8 +344,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
        mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
                     user_fault, write_fault,
                     walker->ptes[walker->level-1] & PT_DIRTY_MASK,
-                    ptwrite, walker->gfn);
-
+                    ptwrite, walker->gfn, walker->page);
+out:
+       kvm_release_page_clean(walker->page);
        return shadow_ent;
 }
 
@@ -371,6 +385,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,
        /*
         * Look up the shadow pte for the faulting address.
         */
+       down_read(&current->mm->mmap_sem);
        r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
                             fetch_fault);
 
@@ -378,12 +393,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,
         * The page is not mapped by the guest.  Let the guest handle it.
         */
        if (!r) {
+               up_read(&current->mm->mmap_sem);
                pgprintk("%s: guest page fault\n", __FUNCTION__);
                inject_page_fault(vcpu, addr, walker.error_code);
                vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
                return 0;
        }
-
+       spin_lock(&vcpu->kvm->mmu_lock);
        shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
                                  &write_pt);
        pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
@@ -395,15 +411,32 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,
        /*
         * mmio: emulate if accessible, otherwise its a guest fault.
         */
-       if (shadow_pte && is_io_pte(*shadow_pte))
+       if (shadow_pte && is_io_pte(*shadow_pte)) {
+               spin_unlock(&vcpu->kvm->mmu_lock);
+               up_read(&current->mm->mmap_sem);
                return 1;
+       }
 
        ++vcpu->stat.pf_fixed;
        kvm_mmu_audit(vcpu, "post page fault (fixed)");
-
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       up_read(&current->mm->mmap_sem);
        return write_pt;
 }
 
+static struct page *FNAME(pte_to_page)(struct kvm_vcpu *vcpu, const void *pte,
+                                      int bytes)
+{
+       pt_element_t gpte = *(const pt_element_t *)pte;
+
+       if (bytes < sizeof(pt_element_t))
+               return NULL;
+       if (!is_present_pte(gpte))
+               return NULL;
+
+       return __gfn_to_page(vcpu->kvm, gpte_to_gfn(gpte));
+}
+
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
        struct guest_walker walker;
@@ -415,6 +448,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t 
vaddr)
        if (r) {
                gpa = gfn_to_gpa(walker.gfn);
                gpa |= vaddr & ~PAGE_MASK;
+               kvm_release_page_clean(walker.page);
        }
 
        return gpa;
@@ -423,27 +457,36 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, 
gva_t vaddr)
 static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
                                 struct kvm_mmu_page *sp)
 {
-       int i, offset = 0;
+       int i, r, offset = 0;
        pt_element_t *gpt;
-       struct page *page;
-
+       void __user *src = (void __user *)gfn_to_hva(vcpu->kvm, sp->gfn);
+       void *dest = (void *)vcpu->kvm->prefetch_tmp_area;
+       
        if (sp->role.metaphysical
            || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
                nonpaging_prefetch_page(vcpu, sp);
                return;
        }
 
+       pagefault_disable();
+       r = __copy_from_user_inatomic(dest, src, PAGE_SIZE);
+       pagefault_enable();
+
+       if (r) {
+               nonpaging_prefetch_page(vcpu, sp);
+               return;
+       }       
+
+       gpt = (pt_element_t *)dest;
+
        if (PTTYPE == 32)
                offset = sp->role.quadrant << PT64_LEVEL_BITS;
-       page = gfn_to_page(vcpu->kvm, sp->gfn);
-       gpt = kmap_atomic(page, KM_USER0);
+
        for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
                if (is_present_pte(gpt[offset + i]))
                        sp->spt[i] = shadow_trap_nonpresent_pte;
                else
                        sp->spt[i] = shadow_notrap_nonpresent_pte;
-       kunmap_atomic(gpt, KM_USER0);
-       kvm_release_page_clean(page);
 }
 
 #undef pt_element_t
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 20c0f5e..e5a40dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1431,27 +1431,34 @@ static int init_rmode_tss(struct kvm *kvm)
 {
        gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
        u16 data = 0;
+       int ret = 0;
        int r;
 
+       down_read(&current->mm->mmap_sem);
        r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-       if (r < 0)
-               return 0;
+       if (r < 0) 
+               goto out;
        data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
        r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16));
        if (r < 0)
-               return 0;
+               goto out;
        r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
        if (r < 0)
-               return 0;
+               goto out;
        r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
        if (r < 0)
-               return 0;
+               goto out;
        data = ~0;
-       r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE 
- 1,
-                       sizeof(u8));
+       r = kvm_write_guest_page(kvm, fn, &data,
+                                RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
+                                sizeof(u8));
        if (r < 0)
-               return 0;
-       return 1;
+               goto out;
+
+       ret = 1;
+out:
+       up_read(&current->mm->mmap_sem);
+       return ret;
 }
 
 static void seg_setup(int seg)
@@ -1468,8 +1475,8 @@ static int alloc_apic_access_page(struct kvm *kvm)
 {
        struct kvm_userspace_memory_region kvm_userspace_mem;
        int r = 0;
-
-       mutex_lock(&kvm->lock);
+       
+       down_write(&current->mm->mmap_sem);
        if (kvm->arch.apic_access_page)
                goto out;
        kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1481,7 +1488,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
                goto out;
        kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
 out:
-       mutex_unlock(&kvm->lock);
+       up_write(&current->mm->mmap_sem);
        return r;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b26270..24d8344 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -180,7 +180,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
        int ret;
        u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
-       mutex_lock(&vcpu->kvm->lock);
+       down_read(&current->mm->mmap_sem);
        ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
                                  offset * sizeof(u64), sizeof(pdpte));
        if (ret < 0) {
@@ -197,7 +197,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
        memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
-       mutex_unlock(&vcpu->kvm->lock);
+       up_read(&current->mm->mmap_sem);
 
        return ret;
 }
@@ -211,13 +211,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
        if (is_long_mode(vcpu) || !is_pae(vcpu))
                return false;
 
-       mutex_lock(&vcpu->kvm->lock);
+       down_read(&current->mm->mmap_sem);
        r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, 
sizeof(pdpte));
        if (r < 0)
                goto out;
        changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
 out:
-       mutex_unlock(&vcpu->kvm->lock);
+       up_read(&current->mm->mmap_sem);
 
        return changed;
 }
@@ -277,9 +277,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        kvm_x86_ops->set_cr0(vcpu, cr0);
        vcpu->arch.cr0 = cr0;
 
-       mutex_lock(&vcpu->kvm->lock);
        kvm_mmu_reset_context(vcpu);
-       mutex_unlock(&vcpu->kvm->lock);
        return;
 }
 EXPORT_SYMBOL_GPL(set_cr0);
@@ -319,9 +317,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        }
        kvm_x86_ops->set_cr4(vcpu, cr4);
        vcpu->arch.cr4 = cr4;
-       mutex_lock(&vcpu->kvm->lock);
        kvm_mmu_reset_context(vcpu);
-       mutex_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr4);
 
@@ -359,7 +355,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                 */
        }
 
-       mutex_lock(&vcpu->kvm->lock);
+       down_read(&current->mm->mmap_sem);
        /*
         * Does the new cr3 value map to physical memory? (Note, we
         * catch an invalid cr3 even in real-mode, because it would
@@ -375,7 +371,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                vcpu->arch.cr3 = cr3;
                vcpu->arch.mmu.new_cr3(vcpu);
        }
-       mutex_unlock(&vcpu->kvm->lock);
+       up_read(&current->mm->mmap_sem);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -1170,12 +1166,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm 
*kvm,
        if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
                return -EINVAL;
 
-       mutex_lock(&kvm->lock);
+       down_write(&current->mm->mmap_sem);
 
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
        kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-       mutex_unlock(&kvm->lock);
+       up_write(&current->mm->mmap_sem);
        return 0;
 }
 
@@ -1224,7 +1220,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
            < alias->target_phys_addr)
                goto out;
 
-       mutex_lock(&kvm->lock);
+       down_write(&current->mm->mmap_sem);
 
        p = &kvm->arch.aliases[alias->slot];
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1238,7 +1234,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
        kvm_mmu_zap_all(kvm);
 
-       mutex_unlock(&kvm->lock);
+       up_write(&current->mm->mmap_sem);
 
        return 0;
 
@@ -1314,7 +1310,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        struct kvm_memory_slot *memslot;
        int is_dirty = 0;
 
-       mutex_lock(&kvm->lock);
+       down_write(&current->mm->mmap_sem);
 
        r = kvm_get_dirty_log(kvm, log, &is_dirty);
        if (r)
@@ -1330,7 +1326,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        }
        r = 0;
 out:
-       mutex_unlock(&kvm->lock);
+       up_write(&current->mm->mmap_sem);
        return r;
 }
 
@@ -1524,25 +1520,32 @@ int emulator_read_std(unsigned long addr,
                             struct kvm_vcpu *vcpu)
 {
        void *data = val;
+       int r = X86EMUL_CONTINUE;
 
+       down_read(&current->mm->mmap_sem);
        while (bytes) {
                gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
 
-               if (gpa == UNMAPPED_GVA)
-                       return X86EMUL_PROPAGATE_FAULT;
+               if (gpa == UNMAPPED_GVA) { 
+                       r = X86EMUL_PROPAGATE_FAULT;
+                       goto out;
+               }
                ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
-               if (ret < 0)
-                       return X86EMUL_UNHANDLEABLE;
+               if (ret < 0) {
+                       r = X86EMUL_UNHANDLEABLE;
+                       goto out;
+               }
 
                bytes -= tocopy;
                data += tocopy;
                addr += tocopy;
        }
-
-       return X86EMUL_CONTINUE;
+out:
+       up_read(&current->mm->mmap_sem);
+       return r;
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
 
@@ -1560,7 +1563,9 @@ static int emulator_read_emulated(unsigned long addr,
                return X86EMUL_CONTINUE;
        }
 
+       down_read(&current->mm->mmap_sem);
        gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       up_read(&current->mm->mmap_sem);
 
        /* For APIC access vmexit */
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1576,11 +1581,14 @@ mmio:
        /*
         * Is this MMIO handled locally?
         */
+       mutex_lock(&vcpu->kvm->lock);
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
        if (mmio_dev) {
                kvm_iodevice_read(mmio_dev, gpa, bytes, val);
+               mutex_unlock(&vcpu->kvm->lock);
                return X86EMUL_CONTINUE;
        }
+       mutex_unlock(&vcpu->kvm->lock);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -1594,11 +1602,21 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, 
gpa_t gpa,
                               const void *val, int bytes)
 {
        int ret;
+       struct page *page;
 
+       down_read(&current->mm->mmap_sem);
        ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
-       if (ret < 0)
+       if (ret < 0) {
+               up_read(&current->mm->mmap_sem);
                return 0;
-       kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+       }
+       page = vcpu->arch.mmu.pte_to_page(vcpu, val, bytes);
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_pte_write(vcpu, gpa, val, bytes, page);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       up_read(&current->mm->mmap_sem);
+       if (page)
+               kvm_release_page_clean(page);
        return 1;
 }
 
@@ -1608,7 +1626,11 @@ static int emulator_write_emulated_onepage(unsigned long 
addr,
                                           struct kvm_vcpu *vcpu)
 {
        struct kvm_io_device *mmio_dev;
-       gpa_t                 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       gpa_t                 gpa;
+
+       down_read(&current->mm->mmap_sem);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       up_read(&current->mm->mmap_sem);
 
        if (gpa == UNMAPPED_GVA) {
                kvm_inject_page_fault(vcpu, addr, 2);
@@ -1626,11 +1648,14 @@ mmio:
        /*
         * Is this MMIO handled locally?
         */
+       mutex_lock(&vcpu->kvm->lock);
        mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
        if (mmio_dev) {
                kvm_iodevice_write(mmio_dev, gpa, bytes, val);
+               mutex_unlock(&vcpu->kvm->lock);
                return X86EMUL_CONTINUE;
        }
+       mutex_unlock(&vcpu->kvm->lock);
 
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
@@ -1677,11 +1702,15 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 #ifndef CONFIG_X86_64
        /* guests cmpxchg8b have to be emulated atomically */
        if (bytes == 8) {
-               gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               gpa_t gpa;
                struct page *page;
                char *addr;
                u64 *val;
 
+               down_read(&current->mm->mmap_sem);
+               gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+               up_read(&current->mm->mmap_sem);
+
                if (gpa == UNMAPPED_GVA ||
                   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
                        goto emul_write;
@@ -2077,10 +2106,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, 
struct kvm_run *run, int in,
                kvm_x86_ops->skip_emulated_instruction(vcpu);
 
        for (i = 0; i < nr_pages; ++i) {
-               mutex_lock(&vcpu->kvm->lock);
+               down_read(&current->mm->mmap_sem);
                page = gva_to_page(vcpu, address + i * PAGE_SIZE);
                vcpu->arch.pio.guest_pages[i] = page;
-               mutex_unlock(&vcpu->kvm->lock);
+               up_read(&current->mm->mmap_sem);
                if (!page) {
                        kvm_inject_gp(vcpu, 0);
                        free_pio_guest_pages(vcpu);
@@ -2203,7 +2232,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
        char instruction[3];
        int ret = 0;
 
-       mutex_lock(&vcpu->kvm->lock);
 
        /*
         * Blow out the MMU to ensure that no other VCPU has an active mapping
@@ -2218,8 +2246,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
            != X86EMUL_CONTINUE)
                ret = -EFAULT;
 
-       mutex_unlock(&vcpu->kvm->lock);
-
        return ret;
 }
 
@@ -2827,13 +2853,13 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
        gpa_t gpa;
 
        vcpu_load(vcpu);
-       mutex_lock(&vcpu->kvm->lock);
+       down_read(&current->mm->mmap_sem);
        gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
+       up_read(&current->mm->mmap_sem);
        tr->physical_address = gpa;
        tr->valid = gpa != UNMAPPED_GVA;
        tr->writeable = 1;
        tr->usermode = 0;
-       mutex_unlock(&vcpu->kvm->lock);
        vcpu_put(vcpu);
 
        return 0;
@@ -3102,13 +3128,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
         */
        if (!user_alloc) {
                if (npages && !old.rmap) {
-                       down_write(&current->mm->mmap_sem);
                        memslot->userspace_addr = do_mmap(NULL, 0,
                                                     npages * PAGE_SIZE,
                                                     PROT_READ | PROT_WRITE,
                                                     MAP_SHARED | MAP_ANONYMOUS,
                                                     0);
-                       up_write(&current->mm->mmap_sem);
 
                        if (IS_ERR((void *)memslot->userspace_addr))
                                return PTR_ERR((void *)memslot->userspace_addr);
@@ -3116,10 +3140,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
                        if (!old.user_alloc && old.rmap) {
                                int ret;
 
-                               down_write(&current->mm->mmap_sem);
                                ret = do_munmap(current->mm, old.userspace_addr,
                                                old.npages * PAGE_SIZE);
-                               up_write(&current->mm->mmap_sem);
                                if (ret < 0)
                                        printk(KERN_WARNING
                                       "kvm_vm_ioctl_set_memory_region: "
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 28940e1..fd06723 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -177,6 +177,8 @@ struct kvm_mmu {
        int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
        void (*free)(struct kvm_vcpu *vcpu);
        gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+       struct page *(*pte_to_page)(struct kvm_vcpu *vcpu, const void *pte,
+                                   int bytes);
        void (*prefetch_page)(struct kvm_vcpu *vcpu,
                              struct kvm_mmu_page *page);
        hpa_t root_hpa;
@@ -468,7 +470,7 @@ unsigned long segment_base(u16 selector);
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                      const u8 *new, int bytes);
+                      const u8 *new, int bytes, struct page *userpage);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 953b50a..6ca0bdb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -105,11 +105,13 @@ struct kvm_memory_slot {
 
 struct kvm {
        struct mutex lock; /* protects everything except vcpus */
+       spinlock_t mmu_lock;
        struct mm_struct *mm; /* userspace tied to this vm */
        int nmemslots;
        struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
                                        KVM_PRIVATE_MEM_SLOTS];
        struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       unsigned long prefetch_tmp_area;
        struct list_head vm_list;
        struct file *filp;
        struct kvm_io_bus mmio_bus;
@@ -163,11 +165,18 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
                                int user_alloc);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
                        int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
+
+int kvm_read_guest_page_inatomic(struct kvm *kvm, gfn_t gfn, void *data, 
+                                int offset, int len);
+int kvm_read_guest_inatomic(struct kvm *kvm, gpa_t gpa, void *data,
+                           unsigned long len);
 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
                         int offset, int len);
 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 845beb2..afdb767 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,12 +165,14 @@ static struct kvm *kvm_create_vm(void)
 
        kvm->mm = current->mm;
        atomic_inc(&kvm->mm->mm_count);
+       spin_lock_init(&kvm->mmu_lock);
        kvm_io_bus_init(&kvm->pio_bus);
        mutex_init(&kvm->lock);
        kvm_io_bus_init(&kvm->mmio_bus);
        spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
        spin_unlock(&kvm_lock);
+       kvm->prefetch_tmp_area = get_zeroed_page(GFP_KERNEL);
 out:
        return kvm;
 }
@@ -211,6 +213,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        kvm_io_bus_destroy(&kvm->mmio_bus);
        kvm_arch_destroy_vm(kvm);
        mmdrop(mm);
+       free_page(kvm->prefetch_tmp_area);
 }
 
 static int kvm_vm_release(struct inode *inode, struct file *filp)
@@ -227,7 +230,7 @@ static int kvm_vm_release(struct inode *inode, struct file 
*filp)
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  *
- * Must be called holding kvm->lock.
+ * Must be called holding mmap_sem for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
                            struct kvm_userspace_memory_region *mem,
@@ -338,9 +341,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
        int r;
 
-       mutex_lock(&kvm->lock);
+       down_write(&current->mm->mmap_sem);
        r = __kvm_set_memory_region(kvm, mem, user_alloc);
-       mutex_unlock(&kvm->lock);
+       up_write(&current->mm->mmap_sem);
        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -442,7 +445,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_memory_slot *slot;
 
@@ -452,11 +455,12 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t 
gfn)
                return bad_hva();
        return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
+EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
  * Requires current->mm->mmap_sem to be held
  */
-static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
+struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
        struct page *page[1];
        unsigned long addr;
@@ -480,6 +484,7 @@ static struct page *__gfn_to_page(struct kvm *kvm, gfn_t 
gfn)
 
        return page[0];
 }
+EXPORT_SYMBOL(__gfn_to_page);
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
@@ -552,6 +557,46 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, 
unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest);
 
+int kvm_read_guest_page_inatomic(struct kvm *kvm, gfn_t gfn, void *data,
+                                int offset, int len)
+{
+       int r;
+       unsigned long addr;
+
+       addr = gfn_to_hva(kvm, gfn);
+       if (kvm_is_error_hva(addr))
+               return -EFAULT;
+       pagefault_disable();
+       r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
+       pagefault_enable();
+       if (r)
+               return -EFAULT;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_page_inatomic);
+
+int kvm_read_guest_inatomic(struct kvm *kvm, gpa_t gpa, void *data,
+                           unsigned long len)
+{
+       gfn_t gfn = gpa >> PAGE_SHIFT;
+       int seg;
+       int offset = offset_in_page(gpa);
+       int ret;
+
+       while ((seg = next_segment(len, offset)) != 0) {
+               ret = kvm_read_guest_page_inatomic(kvm, gfn, data, offset, seg);
+               if (ret < 0)
+                       return ret;
+               offset = 0;
+               len -= seg;
+               data += seg;
+               ++gfn;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_inatomic);
+
+
 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
                         int offset, int len)
 {

-------------------------------------------------------------------------
SF.Net email is sponsored by:
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services
for just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to