The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.50.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.47.6
------>
commit 083ba2f40d2c83f66e320943cd08784751d32e1c
Author: Paolo Bonzini <pbonz...@redhat.com>
Date:   Wed May 16 12:50:46 2018 +0300

    KVM: nVMX: fix EPT permissions as reported in exit qualification
    
    This fixes the new ept_access_test_read_only and ept_access_test_read_write
    testcases from vmx.flat.
    
    The problem is that gpte_access moves bits around to switch from EPT
    bit order (XWR) to ACC_*_MASK bit order (RWX).  This results in an
    incorrect exit qualification.  To fix this, make pt_access and
    pte_access operate on raw PTE values (only with NX flipped to mean
    "can execute") and call gpte_access at the end of the walk.  This
    lets us use pte_access to compute the exit qualification with XWR
    bit order.
    
    Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
    Reviewed-by: Xiao Guangrong <xiaoguangr...@tencent.com>
    Signed-off-by: Radim Krčmář <rkrc...@redhat.com>
    
    (cherry picked from commit 0780516a18f87e881e42ed815f189279b0a1743c)
    Signed-off-by: Jan Dakinevich <jan.dakinev...@virtuozzo.com>
    
    =====================
    Patchset description:
    
    EPT fixes and enhancements
    
    Backport of EPT fixes from upstream for
    https://jira.sw.ru/browse/PSBM-84046
    
    Bandan Das (3):
      kvm: mmu: don't set the present bit unconditionally
      kvm: mmu: track read permission explicitly for shadow EPT page tables
      kvm: vmx: advertise support for ept execute only
    
    Junaid Shahid (2):
      kvm: x86: mmu: Use symbolic constants for EPT Violation Exit
        Qualifications
      kvm: x86: mmu: Rename EPT_VIOLATION_READ/WRITE/INSTR constants
    
    KarimAllah Ahmed (2):
      kvm: Map PFN-type memory regions as writable (if possible)
      KVM: x86: Update the exit_qualification access bits while walking an
        address
    
    Paolo Bonzini (5):
      KVM: nVMX: we support 1GB EPT pages
      kvm: x86: MMU support for EPT accessed/dirty bits
      kvm: nVMX: support EPT accessed/dirty bits
      KVM: MMU: return page fault error code from permission_fault
      KVM: nVMX: fix EPT permissions as reported in exit qualification
---
 arch/x86/kvm/paging_tmpl.h | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 176ec6b5a249..f96f2a4d5bb9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -268,11 +268,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
        pt_element_t pte;
        pt_element_t __user *uninitialized_var(ptep_user);
        gfn_t table_gfn;
-       unsigned index, pt_access, pte_access, accessed_dirty;
+       u64 pt_access, pte_access;
+       unsigned index, accessed_dirty;
        unsigned nested_access;
        gpa_t pte_gpa;
        bool have_ad;
        int offset;
+       u64 walk_nx_mask = 0;
        const int write_fault = access & PFERR_WRITE_MASK;
        const int user_fault  = access & PFERR_USER_MASK;
        const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -287,6 +289,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
        have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
 
 #if PTTYPE == 64
+       walk_nx_mask = 1ULL << PT64_NX_SHIFT;
        if (walker->level == PT32E_ROOT_LEVEL) {
                pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
                trace_kvm_mmu_paging_element(pte, walker->level);
@@ -298,8 +301,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
        walker->max_level = walker->level;
        ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
-       accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
-
        /*
         * FIXME: on Intel processors, loads of the PDPTE registers for PAE 
paging
         * by the MOV to CR instruction are treated as reads and do not cause 
the
@@ -307,14 +308,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
         */
        nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
-       pt_access = pte_access = ACC_ALL;
+       pte_access = ~0;
        ++walker->level;
 
        do {
                gfn_t real_gfn;
                unsigned long host_addr;
 
-               pt_access &= pte_access;
+               pt_access = pte_access;
                --walker->level;
 
                index = PT_INDEX(addr, walker->level);
@@ -357,6 +358,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
 
                trace_kvm_mmu_paging_element(pte, walker->level);
 
+               /*
+                * Inverting the NX it lets us AND it like other
+                * permission bits.
+                */
+               pte_access = pt_access & (pte ^ walk_nx_mask);
+
                if (unlikely(!FNAME(is_present_gpte)(pte)))
                        goto error;
 
@@ -365,13 +372,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
                        goto error;
                }
 
-               accessed_dirty &= pte;
-               pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
-
                walker->ptes[walker->level - 1] = pte;
        } while (!is_last_gpte(mmu, walker->level, pte));
 
-       errcode = permission_fault(vcpu, mmu, pte_access, access);
+       accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+       /* Convert to ACC_*_MASK flags for struct guest_walker.  */
+       walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
+       walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ 
walk_nx_mask);
+       errcode = permission_fault(vcpu, mmu, walker->pte_access, access);
        if (unlikely(errcode))
                goto error;
 
@@ -388,7 +397,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
        walker->gfn = real_gpa >> PAGE_SHIFT;
 
        if (!write_fault)
-               FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
+               FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
        else
                /*
                 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -406,10 +415,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
                        goto retry_walk;
        }
 
-       walker->pt_access = pt_access;
-       walker->pte_access = pte_access;
        pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-                __func__, (u64)pte, pte_access, pt_access);
+                __func__, (u64)pte, walker->pte_access, walker->pt_access);
        return 1;
 
 error:
@@ -437,7 +444,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
         */
        if (!(errcode & PFERR_RSVD_MASK)) {
                vcpu->arch.exit_qualification &= 0x187;
-               vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+               vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
        }
 #endif
        walker->fault.address = addr;
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to