As suggested by Andrea, pass r/w error code to gup(), upgrading read fault
to writable if host pte allows it.

Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2215,7 +2215,7 @@ static void direct_pte_prefetch(struct k
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-                       int level, gfn_t gfn, pfn_t pfn)
+                       int map_writable, int level, gfn_t gfn, pfn_t pfn)
 {
        struct kvm_shadow_walk_iterator iterator;
        struct kvm_mmu_page *sp;
@@ -2224,9 +2224,13 @@ static int __direct_map(struct kvm_vcpu 
 
        for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
                if (iterator.level == level) {
-                       mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+                       unsigned pte_access = ACC_ALL;
+
+                       if (!map_writable)
+                               pte_access &= ~ACC_WRITE_MASK;
+                       mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
                                     0, write, 1, &pt_write,
-                                    level, gfn, pfn, false, true);
+                                    level, gfn, pfn, false, map_writable);
                        direct_pte_prefetch(vcpu, iterator.sptep);
                        ++vcpu->stat.pf_fixed;
                        break;
@@ -2287,6 +2291,7 @@ static int nonpaging_map(struct kvm_vcpu
        int level;
        pfn_t pfn;
        unsigned long mmu_seq;
+       bool map_writable;
 
        level = mapping_level(vcpu, gfn);
 
@@ -2301,7 +2306,7 @@ static int nonpaging_map(struct kvm_vcpu
 
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
-       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+       pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);
 
        /* mmio */
        if (is_error_pfn(pfn))
@@ -2311,7 +2316,7 @@ static int nonpaging_map(struct kvm_vcpu
        if (mmu_notifier_retry(vcpu, mmu_seq))
                goto out_unlock;
        kvm_mmu_free_some_pages(vcpu);
-       r = __direct_map(vcpu, v, write, level, gfn, pfn);
+       r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -2609,11 +2614,11 @@ static bool can_do_async_pf(struct kvm_v
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
-                        gva_t gva, pfn_t *pfn)
+                        gva_t gva, pfn_t *pfn, bool write, bool *writable)
 {
        bool async;
 
-       *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
+       *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
 
        if (!async)
                return false; /* *pfn has correct page already */
@@ -2630,7 +2635,7 @@ static bool try_async_pf(struct kvm_vcpu
                        return true;
        }
 
-       *pfn = gfn_to_pfn(vcpu->kvm, gfn);
+       *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
        
        return false;
 }
@@ -2643,6 +2648,8 @@ static int tdp_page_fault(struct kvm_vcp
        int level;
        gfn_t gfn = gpa >> PAGE_SHIFT;
        unsigned long mmu_seq;
+       int write = error_code & PFERR_WRITE_MASK;
+       bool map_writable;
 
        ASSERT(vcpu);
        ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2658,7 +2665,7 @@ static int tdp_page_fault(struct kvm_vcp
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn))
+       if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
                return 0;
 
        /* mmio */
@@ -2668,7 +2675,7 @@ static int tdp_page_fault(struct kvm_vcp
        if (mmu_notifier_retry(vcpu, mmu_seq))
                goto out_unlock;
        kvm_mmu_free_some_pages(vcpu);
-       r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+       r = __direct_map(vcpu, gpa, write, map_writable,
                         level, gfn, pfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct k
 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                         struct guest_walker *gw,
                         int user_fault, int write_fault, int hlevel,
-                        int *ptwrite, pfn_t pfn)
+                        int *ptwrite, pfn_t pfn, bool map_writable)
 {
        unsigned access = gw->pt_access;
        struct kvm_mmu_page *sp = NULL;
@@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
 
        mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
                     user_fault, write_fault, dirty, ptwrite, it.level,
-                    gw->gfn, pfn, false, true);
+                    gw->gfn, pfn, false, map_writable);
        FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 
        return it.sptep;
@@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_
        pfn_t pfn;
        int level = PT_PAGE_TABLE_LEVEL;
        unsigned long mmu_seq;
+       bool map_writable;
 
        pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
@@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn))
+       if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
+                        &map_writable))
                return 0;
 
        /* mmio */
        if (is_error_pfn(pfn))
                return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
 
+       if (!map_writable)
+               walker.pte_access &= ~ACC_WRITE_MASK;
+
        spin_lock(&vcpu->kvm->mmu_lock);
        if (mmu_notifier_retry(vcpu, mmu_seq))
                goto out_unlock;
@@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_
        trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
        kvm_mmu_free_some_pages(vcpu);
        sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-                            level, &write_pt, pfn);
+                            level, &write_pt, pfn, map_writable);
        (void)sptep;
        pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
                 sptep, *sptep, write_pt);
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async);
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+                      bool write_fault, bool *writable);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+                     bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
                         struct kvm_memory_slot *slot, gfn_t gfn);
 int memslot_id(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -959,7 +959,7 @@ static pfn_t get_fault_pfn(void)
 }
 
 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-                       bool *async)
+                       bool *async, bool write_fault, bool *writable)
 {
        struct page *page[1];
        int npages = 0;
@@ -968,19 +968,42 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
        /* we can do it either atomically or asynchronously, not both */
        BUG_ON(atomic && async);
 
+       BUG_ON(!write_fault && !writable);
+
+       if (writable)
+               *writable = true;
+
        if (atomic || async)
                npages = __get_user_pages_fast(addr, 1, 1, page);
 
        if (unlikely(npages != 1) && !atomic) {
                might_sleep();
 
+               if (writable)
+                       *writable = write_fault;
+
                if (async) {
                        down_read(&current->mm->mmap_sem);
                        npages = get_user_pages_noio(current, current->mm,
-                                                    addr, 1, 1, 0, page, NULL);
+                                                    addr, 1, write_fault, 0,
+                                                    page, NULL);
                        up_read(&current->mm->mmap_sem);
                } else
-                       npages = get_user_pages_fast(addr, 1, 1, page);
+                       npages = get_user_pages_fast(addr, 1, write_fault,
+                                                    page);
+
+               /* map read fault as writable if possible */
+               if (unlikely(!write_fault) && npages == 1) {
+                       struct page *wpage[1];
+
+                       npages = __get_user_pages_fast(addr, 1, 1, wpage);
+                       if (npages == 1) {
+                               *writable = true;
+                               put_page(page[0]);
+                               page[0] = wpage[0];
+                       }
+                       npages = 1;
+               }
        }
 
        if (unlikely(npages != 1)) {
@@ -1018,11 +1041,12 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
 {
-       return hva_to_pfn(kvm, addr, true, NULL);
+       return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
+                         bool write_fault, bool *writable)
 {
        unsigned long addr;
 
@@ -1035,32 +1059,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kv
                return page_to_pfn(bad_page);
        }
 
-       return hva_to_pfn(kvm, addr, atomic, async);
+       return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
 {
-       return __gfn_to_pfn(kvm, gfn, true, NULL);
+       return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
 
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+                      bool write_fault, bool *writable)
 {
-       return __gfn_to_pfn(kvm, gfn, false, async);
+       return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
-       return __gfn_to_pfn(kvm, gfn, false, NULL);
+       return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+                     bool *writable)
+{
+       return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
+
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
                         struct kvm_memory_slot *slot, gfn_t gfn)
 {
        unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-       return hva_to_pfn(kvm, addr, false, NULL);
+       return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to