Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification

2017-05-11 Thread Xiao Guangrong



On 05/12/2017 11:59 AM, Xiao Guangrong wrote:
error:

@@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
   */
  if (!(errcode & PFERR_RSVD_MASK)) {
  vcpu->arch.exit_qualification &= 0x187;
-vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;

 ^ here, the 
original code
is buggy as pt_access and pte have different bit order, fortunately, this patch 
fixes it
too. :)



It's exactly what this patch aims at, do not know what i was thinking when
wrote these down. :(


Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification

2017-05-11 Thread Xiao Guangrong



On 05/12/2017 11:59 AM, Xiao Guangrong wrote:
error:

@@ -452,7 +459,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
   */
  if (!(errcode & PFERR_RSVD_MASK)) {
  vcpu->arch.exit_qualification &= 0x187;
-vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;

 ^ here, the 
original code
is buggy as pt_access and pte have different bit order, fortunately, this patch 
fixes it
too. :)



It's exactly what this patch aims at, do not know what i was thinking when
wrote these down. :(


Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification

2017-05-11 Thread Xiao Guangrong



On 05/11/2017 07:23 PM, Paolo Bonzini wrote:

This fixes the new ept_access_test_read_only and ept_access_test_read_write
testcases from vmx.flat.

The problem is that gpte_access moves bits around to switch from EPT
bit order (XWR) to ACC_*_MASK bit order (RWX).  This results in an
incorrect exit qualification.  To fix this, make pt_access and
pte_access operate on raw PTE values (only with NX flipped to mean
"can execute") and call gpte_access at the end of the walk.  This
lets us use pte_access to compute the exit qualification with XWR
bit order.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/paging_tmpl.h | 35 +--
  1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 56241746abbd..b0454c7e4cff 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
-   unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+   u64 pt_access, pte_access;
+   unsigned index, accessed_dirty, pte_pkey;
unsigned nested_access;
gpa_t pte_gpa;
bool have_ad;
int offset;
+   u64 walk_nx_mask = 0;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault  = access & PFERR_USER_MASK;
const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
have_ad   = PT_HAVE_ACCESSED_DIRTY(mmu);
  
  #if PTTYPE == 64

+   walk_nx_mask = 1ULL << PT64_NX_SHIFT;


We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as:
- for EPT, this bit is useless
- for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too,


if (walker->level == PT32E_ROOT_LEVEL) {
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
@@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
  
-	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;

-
/*
 * FIXME: on Intel processors, loads of the PDPTE registers for PAE 
paging
 * by the MOV to CR instruction are treated as reads and do not cause 
the
@@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
 */
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
  
-	pt_access = pte_access = ACC_ALL;

+   pte_access = ~0;
++walker->level;
  
  	do {

gfn_t real_gfn;
unsigned long host_addr;
  
-		pt_access &= pte_access;

+   pt_access = pte_access;
--walker->level;
  
  		index = PT_INDEX(addr, walker->level);

@@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
  
  		trace_kvm_mmu_paging_element(pte, walker->level);
  
+		/*

+* Inverting the NX it lets us AND it like other
+* permission bits.
+*/
+   pte_access = pt_access & (pte ^ walk_nx_mask);
+
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
  
@@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,

goto error;
}
  
-		accessed_dirty &= pte;

-   pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
-
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
  
  	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);

-   errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
+   accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+   /* Convert to ACC_*_MASK flags for struct guest_walker.  */
+   walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
+   walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ 
walk_nx_mask);
+   errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, 
access);
if (unlikely(errcode))
goto error;
  
@@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,

walker->gfn = real_gpa >> PAGE_SHIFT;
  
  	if (!write_fault)

-   FNAME(protect_clean_gpte)(mmu, _access, pte);
+   FNAME(protect_clean_gpte)(mmu, >pte_access, pte);
else
/*
 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
goto retry_walk;
}
  
-	walker->pt_access 

Re: [PATCH 1/2] KVM: nVMX: fix EPT permissions as reported in exit qualification

2017-05-11 Thread Xiao Guangrong



On 05/11/2017 07:23 PM, Paolo Bonzini wrote:

This fixes the new ept_access_test_read_only and ept_access_test_read_write
testcases from vmx.flat.

The problem is that gpte_access moves bits around to switch from EPT
bit order (XWR) to ACC_*_MASK bit order (RWX).  This results in an
incorrect exit qualification.  To fix this, make pt_access and
pte_access operate on raw PTE values (only with NX flipped to mean
"can execute") and call gpte_access at the end of the walk.  This
lets us use pte_access to compute the exit qualification with XWR
bit order.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/paging_tmpl.h | 35 +--
  1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 56241746abbd..b0454c7e4cff 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
-   unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+   u64 pt_access, pte_access;
+   unsigned index, accessed_dirty, pte_pkey;
unsigned nested_access;
gpa_t pte_gpa;
bool have_ad;
int offset;
+   u64 walk_nx_mask = 0;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault  = access & PFERR_USER_MASK;
const int fetch_fault = access & PFERR_FETCH_MASK;
@@ -302,6 +304,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
have_ad   = PT_HAVE_ACCESSED_DIRTY(mmu);
  
  #if PTTYPE == 64

+   walk_nx_mask = 1ULL << PT64_NX_SHIFT;


We can always make walk_nx_mask = 1ULL << PT64_NX_SHIFT, as:
- for EPT, this bit is useless
- for 32bit, bit 63 is always ZERO, so that the final result should be ZERO too,


if (walker->level == PT32E_ROOT_LEVEL) {
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
@@ -313,8 +316,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
  
-	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;

-
/*
 * FIXME: on Intel processors, loads of the PDPTE registers for PAE 
paging
 * by the MOV to CR instruction are treated as reads and do not cause 
the
@@ -322,14 +323,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
 */
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
  
-	pt_access = pte_access = ACC_ALL;

+   pte_access = ~0;
++walker->level;
  
  	do {

gfn_t real_gfn;
unsigned long host_addr;
  
-		pt_access &= pte_access;

+   pt_access = pte_access;
--walker->level;
  
  		index = PT_INDEX(addr, walker->level);

@@ -371,6 +372,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
  
  		trace_kvm_mmu_paging_element(pte, walker->level);
  
+		/*

+* Inverting the NX it lets us AND it like other
+* permission bits.
+*/
+   pte_access = pt_access & (pte ^ walk_nx_mask);
+
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
  
@@ -379,14 +386,16 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,

goto error;
}
  
-		accessed_dirty &= pte;

-   pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
-
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
  
  	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);

-   errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
+   accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+   /* Convert to ACC_*_MASK flags for struct guest_walker.  */
+   walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
+   walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ 
walk_nx_mask);
+   errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, 
access);
if (unlikely(errcode))
goto error;
  
@@ -403,7 +412,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,

walker->gfn = real_gpa >> PAGE_SHIFT;
  
  	if (!write_fault)

-   FNAME(protect_clean_gpte)(mmu, _access, pte);
+   FNAME(protect_clean_gpte)(mmu, >pte_access, pte);
else
/*
 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -421,10 +430,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
goto retry_walk;
}
  
-	walker->pt_access = pt_access;

-