Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic
On 10/11/2015 10:28, Han, Huaitong wrote: > > pkru = is_long_mode(vcpu) ? read_pkru() : 0; > > if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) { > > ... from above ... */ > > > > /* Flip PFERR_PK_MASK if pkru_bits is non-zero */ > > pfec ^= -pkru_bits & PFERR_PK_MASK; > > If pkru_bits is zero, it means dynamically conditions is not met for > protection-key violations, so pfec on PK bit should be flipped. So I > guess it should be: > pfec ^= pkru_bits ? 0 : PFERR_PK_MASK; Right. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic
On Mon, 2015-11-09 at 14:17 +0100, Paolo Bonzini wrote: > > > static inline bool permission_fault(struct kvm_vcpu *vcpu, > > > struct kvm_mmu *mmu, > > > - unsigned pte_access, > > > unsigned pfec) > > > + unsigned pte_access, unsigned pte_pkeys, > > > unsigned pfec) > > > { > > > - int cpl = kvm_x86_ops->get_cpl(vcpu); > > > - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); > > > + unsigned long smap, rflags; > > > + u32 pkru; > > > + int cpl, index; > > > + bool wf, uf, pk, pkru_ad, pkru_wd; > > > + > > > + cpl = kvm_x86_ops->get_cpl(vcpu); > > > + rflags = kvm_x86_ops->get_rflags(vcpu); > > > + > > > + pkru = read_pkru(); > > > + > > > + /* > > > + * PKRU defines 32 bits, there are 16 domains and 2 > > > attribute bits per > > > + * domain in pkru, pkey is index to a defined domain, so > > > the value of > > > + * pkey * PKRU_ATTRS + R/W is offset of a defined domain > > > attribute. > > > + */ > > > + pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) > > > & 1; > > > + pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + > > > PKRU_WRITE)) & 1; > > > + > > > + wf = pfec & PFERR_WRITE_MASK; > > > + uf = pfec & PFERR_USER_MASK; > > > + > > > + /* > > > + * PKeys 2nd and 6th conditions: > > > + * 2.EFER_LMA=1 > > > + * 6.PKRU.AD=1 > > > + * or The access is a data write and PKRU.WD=1 and > > > + * either CR0.WP=1 or it is a user mode > > > access > > > + */ > > > + pk = is_long_mode(vcpu) && (pkru_ad || > > > + (pkru_wd && wf && > > > (is_write_protection(vcpu) || uf))); > > A little more optimized: > > pkru_bits = (pkru >> (pte_pkeys * PKRU_ATTRS)) & 3; > > /* >* Ignore PKRU.WD if not relevant to this access (a read, >* or a supervisor mode access if CR0.WP=0). >*/ > if (!wf || (!uf && !is_write_protection(vcpu))) > pkru_bits &= ~(1 << PKRU_WRITE); > > ... and then just check pkru_bits != 0. > > > > + /* > > > + * PK bit right value in pfec equal to > > > + * PK bit current value in pfec and pk value. > > > + */ > > > + pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK; > > > > PK is only applicable to guest page tables, but if you do not > > support > > PKRU without EPT (patch 9), none of this is necessary, is it? > > Doh. :( Sorry, this is of course needed for the emulation case. > > I think you should optimize this for the common case where pkru is > zero, > hence pk will always be zero. So something like > > pkru = is_long_mode(vcpu) ? read_pkru() : 0; > if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) { > ... from above ... */ > > /* Flip PFERR_PK_MASK if pkru_bits is non-zero */ > pfec ^= -pkru_bits & PFERR_PK_MASK; If pkru_bits is zero, it means dynamically conditions is not met for protection-key violations, so pfec on PK bit should be flipped. So I guess it should be: pfec ^= pkru_bits ? 0 : PFERR_PK_MASK; > } > > Paolo >
Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic
On 09/11/2015 13:43, Paolo Bonzini wrote: > > > On 09/11/2015 12:54, Huaitong Han wrote: >> Protection keys define a new 4-bit protection key field (PKEY) in bits >> 62:59 of leaf entries of the page tables, the PKEY is an index to PKRU >> register(16 domains), every domain has 2 bits(write disable bit, access >> disable bit). >> >> Static logic has been produced in update_permission_bitmask, dynamic logic >> need read pkey from page table entries, get pkru value, and deduce the >> correct result. >> >> Signed-off-by: Huaitong Han >> >> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h >> index e4202e4..bbb 100644 >> --- a/arch/x86/kvm/mmu.h >> +++ b/arch/x86/kvm/mmu.h >> @@ -3,6 +3,7 @@ >> >> #include >> #include "kvm_cache_regs.h" >> +#include "x86.h" >> >> #define PT64_PT_BITS 9 >> #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) >> @@ -24,6 +25,11 @@ >> #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) >> #define PT_PAT_MASK (1ULL << 7) >> #define PT_GLOBAL_MASK (1ULL << 8) >> + >> +#define PT64_PKEY_BIT0 (1ULL << _PAGE_BIT_PKEY_BIT0) >> +#define PT64_PKEY_BIT1 (1ULL << _PAGE_BIT_PKEY_BIT1) >> +#define PT64_PKEY_BIT2 (1ULL << _PAGE_BIT_PKEY_BIT2) >> +#define PT64_PKEY_BIT3 (1ULL << _PAGE_BIT_PKEY_BIT3) >> #define PT64_NX_SHIFT 63 >> #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) >> >> @@ -45,6 +51,15 @@ >> #define PT_PAGE_TABLE_LEVEL 1 >> #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1) >> >> +#define PKEYS_BIT0_VALUE (1ULL << 0) >> +#define PKEYS_BIT1_VALUE (1ULL << 1) >> +#define PKEYS_BIT2_VALUE (1ULL << 2) >> +#define PKEYS_BIT3_VALUE (1ULL << 3) >> + >> +#define PKRU_READ 0 >> +#define PKRU_WRITE 1 >> +#define PKRU_ATTRS 2 >> + >> static inline u64 rsvd_bits(int s, int e) >> { >> return ((1ULL << (e - s + 1)) - 1) << s; >> @@ -145,10 +160,44 @@ static inline bool is_write_protection(struct kvm_vcpu >> *vcpu) >> * fault with the given access (in ACC_* format)? >> */ >> static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu >> *mmu, >> -unsigned pte_access, unsigned pfec) >> +unsigned pte_access, unsigned pte_pkeys, unsigned pfec) >> { >> -int cpl = kvm_x86_ops->get_cpl(vcpu); >> -unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); >> +unsigned long smap, rflags; >> +u32 pkru; >> +int cpl, index; >> +bool wf, uf, pk, pkru_ad, pkru_wd; >> + >> +cpl = kvm_x86_ops->get_cpl(vcpu); >> +rflags = kvm_x86_ops->get_rflags(vcpu); >> + >> +pkru = read_pkru(); >> + >> +/* >> +* PKRU defines 32 bits, there are 16 domains and 2 attribute bits per >> +* domain in pkru, pkey is index to a defined domain, so the value of >> +* pkey * PKRU_ATTRS + R/W is offset of a defined domain attribute. >> +*/ >> +pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) & 1; >> +pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_WRITE)) & 1; >> + >> +wf = pfec & PFERR_WRITE_MASK; >> +uf = pfec & PFERR_USER_MASK; >> + >> +/* >> +* PKeys 2nd and 6th conditions: >> +* 2.EFER_LMA=1 >> +* 6.PKRU.AD=1 >> +* or The access is a data write and PKRU.WD=1 and >> +* either CR0.WP=1 or it is a user mode access >> +*/ >> +pk = is_long_mode(vcpu) && (pkru_ad || >> +(pkru_wd && wf && (is_write_protection(vcpu) || uf))); A little more optimized: pkru_bits = (pkru >> (pte_pkeys * PKRU_ATTRS)) & 3; /* * Ignore PKRU.WD if not relevant to this access (a read, * or a supervisor mode access if CR0.WP=0). */ if (!wf || (!uf && !is_write_protection(vcpu))) pkru_bits &= ~(1 << PKRU_WRITE); ... and then just check pkru_bits != 0. >> +/* >> +* PK bit right value in pfec equal to >> +* PK bit current value in pfec and pk value. >> +*/ >> +pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK; > > PK is only applicable to guest page tables, but if you do not support > PKRU without EPT (patch 9), none of this is necessary, is it? Doh. :( Sorry, this is of course needed for the emulation case. I think you should optimize this for the common case where pkru is zero, hence pk will always be zero. So something like pkru = is_long_mode(vcpu) ? read_pkru() : 0; if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) { ... from above ... */ /* Flip PFERR_PK_MASK if pkru_bits is non-zero */ pfec ^= -pkru_bits & PFERR_PK_MASK; } Paolo >> /* >> * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. >> @@ -163,8 +212,8 @@ static inline bool permission_fault(struct kvm_vcpu >> *vcpu, struct kvm_mmu *mmu, >> * but it will be one in index if SMAP checks are being overridden. >> * It is important to keep this branchless. >> */ >> -unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); >> -int i
Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic
On 09/11/2015 12:54, Huaitong Han wrote: > Protection keys define a new 4-bit protection key field (PKEY) in bits > 62:59 of leaf entries of the page tables, the PKEY is an index to PKRU > register(16 domains), every domain has 2 bits(write disable bit, access > disable bit). > > Static logic has been produced in update_permission_bitmask, dynamic logic > need read pkey from page table entries, get pkru value, and deduce the > correct result. > > Signed-off-by: Huaitong Han > > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h > index e4202e4..bbb 100644 > --- a/arch/x86/kvm/mmu.h > +++ b/arch/x86/kvm/mmu.h > @@ -3,6 +3,7 @@ > > #include > #include "kvm_cache_regs.h" > +#include "x86.h" > > #define PT64_PT_BITS 9 > #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) > @@ -24,6 +25,11 @@ > #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) > #define PT_PAT_MASK (1ULL << 7) > #define PT_GLOBAL_MASK (1ULL << 8) > + > +#define PT64_PKEY_BIT0 (1ULL << _PAGE_BIT_PKEY_BIT0) > +#define PT64_PKEY_BIT1 (1ULL << _PAGE_BIT_PKEY_BIT1) > +#define PT64_PKEY_BIT2 (1ULL << _PAGE_BIT_PKEY_BIT2) > +#define PT64_PKEY_BIT3 (1ULL << _PAGE_BIT_PKEY_BIT3) > #define PT64_NX_SHIFT 63 > #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) > > @@ -45,6 +51,15 @@ > #define PT_PAGE_TABLE_LEVEL 1 > #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1) > > +#define PKEYS_BIT0_VALUE (1ULL << 0) > +#define PKEYS_BIT1_VALUE (1ULL << 1) > +#define PKEYS_BIT2_VALUE (1ULL << 2) > +#define PKEYS_BIT3_VALUE (1ULL << 3) > + > +#define PKRU_READ 0 > +#define PKRU_WRITE 1 > +#define PKRU_ATTRS 2 > + > static inline u64 rsvd_bits(int s, int e) > { > return ((1ULL << (e - s + 1)) - 1) << s; > @@ -145,10 +160,44 @@ static inline bool is_write_protection(struct kvm_vcpu > *vcpu) > * fault with the given access (in ACC_* format)? > */ > static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu > *mmu, > - unsigned pte_access, unsigned pfec) > + unsigned pte_access, unsigned pte_pkeys, unsigned pfec) > { > - int cpl = kvm_x86_ops->get_cpl(vcpu); > - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); > + unsigned long smap, rflags; > + u32 pkru; > + int cpl, index; > + bool wf, uf, pk, pkru_ad, pkru_wd; > + > + cpl = kvm_x86_ops->get_cpl(vcpu); > + rflags = kvm_x86_ops->get_rflags(vcpu); > + > + pkru = read_pkru(); > + > + /* > + * PKRU defines 32 bits, there are 16 domains and 2 attribute bits per > + * domain in pkru, pkey is index to a defined domain, so the value of > + * pkey * PKRU_ATTRS + R/W is offset of a defined domain attribute. > + */ > + pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) & 1; > + pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_WRITE)) & 1; > + > + wf = pfec & PFERR_WRITE_MASK; > + uf = pfec & PFERR_USER_MASK; > + > + /* > + * PKeys 2nd and 6th conditions: > + * 2.EFER_LMA=1 > + * 6.PKRU.AD=1 > + * or The access is a data write and PKRU.WD=1 and > + * either CR0.WP=1 or it is a user mode access > + */ > + pk = is_long_mode(vcpu) && (pkru_ad || > + (pkru_wd && wf && (is_write_protection(vcpu) || uf))); > + > + /* > + * PK bit right value in pfec equal to > + * PK bit current value in pfec and pk value. > + */ > + pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK; PK is only applicable to guest page tables, but if you do not support PKRU without EPT (patch 9), none of this is necessary, is it? > /* >* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. > @@ -163,8 +212,8 @@ static inline bool permission_fault(struct kvm_vcpu > *vcpu, struct kvm_mmu *mmu, >* but it will be one in index if SMAP checks are being overridden. >* It is important to keep this branchless. >*/ > - unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); > - int index = (pfec >> 1) + > + smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); > + index = (pfec >> 1) + > (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); > > WARN_ON(pfec & PFERR_RSVD_MASK); > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 736e6ab..99563bc 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -253,6 +253,17 @@ static int FNAME(update_accessed_dirty_bits)(struct > kvm_vcpu *vcpu, > } > return 0; > } > +static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) > +{ > + unsigned pkeys = 0; > +#if PTTYPE == 64 > + pkeys = ((gpte & PT64_PKEY_BIT0) ? PKEYS_BIT0_VALUE : 0) | > + ((gpte & PT64_PKEY_BIT1) ? PKEYS_BIT1_VALUE : 0) | > + ((gpte & PT64_PKEY_BIT2) ? PKEYS_BIT2_VALUE : 0) | > + ((gpte & PT64_PKEY_BIT3) ? PKEYS_BIT3_VALUE : 0); This is just pkeys = (gpte >> _PAGE_BIT_PKEY