Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic

2015-11-10 Thread Han, Huaitong
On Mon, 2015-11-09 at 14:17 +0100, Paolo Bonzini wrote:

> > >  static inline bool permission_fault(struct kvm_vcpu *vcpu,
> > > struct kvm_mmu *mmu,
> > > - unsigned pte_access,
> > > unsigned pfec)
> > > + unsigned pte_access, unsigned pte_pkeys,
> > > unsigned pfec)
> > >  {
> > > - int cpl = kvm_x86_ops->get_cpl(vcpu);
> > > - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> > > + unsigned long smap, rflags;
> > > + u32 pkru;
> > > + int cpl, index;
> > > + bool wf, uf, pk, pkru_ad, pkru_wd;
> > > +
> > > + cpl = kvm_x86_ops->get_cpl(vcpu);
> > > + rflags = kvm_x86_ops->get_rflags(vcpu);
> > > +
> > > + pkru = read_pkru();
> > > +
> > > + /*
> > > + * PKRU defines 32 bits, there are 16 domains and 2
> > > attribute bits per
> > > + * domain in pkru, pkey is index to a defined domain, so
> > > the value of
> > > + * pkey * PKRU_ATTRS + R/W is offset of a defined domain
> > > attribute.
> > > + */
> > > + pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ))
> > > & 1;
> > > + pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS +
> > > PKRU_WRITE)) & 1;
> > > +
> > > + wf = pfec & PFERR_WRITE_MASK;
> > > + uf = pfec & PFERR_USER_MASK;
> > > +
> > > + /*
> > > + * PKeys 2nd and 6th conditions:
> > > + * 2.EFER_LMA=1
> > > + * 6.PKRU.AD=1
> > > + *   or The access is a data write and PKRU.WD=1 and
> > > + *   either CR0.WP=1 or it is a user mode
> > > access
> > > + */
> > > + pk = is_long_mode(vcpu) && (pkru_ad ||
> > > + (pkru_wd && wf &&
> > > (is_write_protection(vcpu) || uf)));
> 
> A little more optimized:
> 
>   pkru_bits = (pkru >> (pte_pkeys * PKRU_ATTRS)) & 3;
> 
>   /*
>* Ignore PKRU.WD if not relevant to this access (a read,
>* or a supervisor mode access if CR0.WP=0).
>*/
>   if (!wf || (!uf && !is_write_protection(vcpu)))
>   pkru_bits &= ~(1 << PKRU_WRITE);
> 
> ... and then just check pkru_bits != 0.
> 
> > > + /*
> > > + * PK bit right value in pfec equal to
> > > + * PK bit current value in pfec and pk value.
> > > + */
> > > + pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK;
> > 
> > PK is only applicable to guest page tables, but if you do not
> > support
> > PKRU without EPT (patch 9), none of this is necessary, is it?
> 
> Doh. :(  Sorry, this is of course needed for the emulation case.
> 
> I think you should optimize this for the common case where pkru is
> zero,
> hence pk will always be zero.  So something like
> 
>   pkru = is_long_mode(vcpu) ? read_pkru() : 0;
>   if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) {
>   ... from above ... */
> 
>   /* Flip PFERR_PK_MASK if pkru_bits is non-zero */
>   pfec ^= -pkru_bits & PFERR_PK_MASK;

If pkru_bits is zero, it means dynamically conditions is not met for
protection-key violations, so pfec on PK bit should be flipped. So I
guess it should be:
pfec ^= pkru_bits ? 0 : PFERR_PK_MASK;

>   }
> 
> Paolo
> 


Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic

2015-11-10 Thread Paolo Bonzini


On 10/11/2015 10:28, Han, Huaitong wrote:
> > pkru = is_long_mode(vcpu) ? read_pkru() : 0;
> > if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) {
> > ... from above ... */
> > 
> > /* Flip PFERR_PK_MASK if pkru_bits is non-zero */
> > pfec ^= -pkru_bits & PFERR_PK_MASK;
> 
> If pkru_bits is zero, it means dynamically conditions is not met for
> protection-key violations, so pfec on PK bit should be flipped. So I
> guess it should be:
>   pfec ^= pkru_bits ? 0 : PFERR_PK_MASK;

Right.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic

2015-11-09 Thread Paolo Bonzini


On 09/11/2015 12:54, Huaitong Han wrote:
> Protection keys define a new 4-bit protection key field (PKEY) in bits
> 62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
> register(16 domains), every domain has 2 bits(write disable bit, access
> disable bit).
> 
> Static logic has been produced in update_permission_bitmask, dynamic logic
> need read pkey from page table entries, get pkru value, and deduce the
> correct result.
> 
> Signed-off-by: Huaitong Han 
> 
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index e4202e4..bbb 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -3,6 +3,7 @@
>  
>  #include 
>  #include "kvm_cache_regs.h"
> +#include "x86.h"
>  
>  #define PT64_PT_BITS 9
>  #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
> @@ -24,6 +25,11 @@
>  #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
>  #define PT_PAT_MASK (1ULL << 7)
>  #define PT_GLOBAL_MASK (1ULL << 8)
> +
> +#define PT64_PKEY_BIT0 (1ULL << _PAGE_BIT_PKEY_BIT0)
> +#define PT64_PKEY_BIT1 (1ULL << _PAGE_BIT_PKEY_BIT1)
> +#define PT64_PKEY_BIT2 (1ULL << _PAGE_BIT_PKEY_BIT2)
> +#define PT64_PKEY_BIT3 (1ULL << _PAGE_BIT_PKEY_BIT3)
>  #define PT64_NX_SHIFT 63
>  #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
>  
> @@ -45,6 +51,15 @@
>  #define PT_PAGE_TABLE_LEVEL 1
>  #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
>  
> +#define PKEYS_BIT0_VALUE (1ULL << 0)
> +#define PKEYS_BIT1_VALUE (1ULL << 1)
> +#define PKEYS_BIT2_VALUE (1ULL << 2)
> +#define PKEYS_BIT3_VALUE (1ULL << 3)
> +
> +#define PKRU_READ   0
> +#define PKRU_WRITE  1
> +#define PKRU_ATTRS  2
> +
>  static inline u64 rsvd_bits(int s, int e)
>  {
>   return ((1ULL << (e - s + 1)) - 1) << s;
> @@ -145,10 +160,44 @@ static inline bool is_write_protection(struct kvm_vcpu 
> *vcpu)
>   * fault with the given access (in ACC_* format)?
>   */
>  static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu 
> *mmu,
> - unsigned pte_access, unsigned pfec)
> + unsigned pte_access, unsigned pte_pkeys, unsigned pfec)
>  {
> - int cpl = kvm_x86_ops->get_cpl(vcpu);
> - unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> + unsigned long smap, rflags;
> + u32 pkru;
> + int cpl, index;
> + bool wf, uf, pk, pkru_ad, pkru_wd;
> +
> + cpl = kvm_x86_ops->get_cpl(vcpu);
> + rflags = kvm_x86_ops->get_rflags(vcpu);
> +
> + pkru = read_pkru();
> +
> + /*
> + * PKRU defines 32 bits, there are 16 domains and 2 attribute bits per
> + * domain in pkru, pkey is index to a defined domain, so the value of
> + * pkey * PKRU_ATTRS + R/W is offset of a defined domain attribute.
> + */
> + pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) & 1;
> + pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_WRITE)) & 1;
> +
> + wf = pfec & PFERR_WRITE_MASK;
> + uf = pfec & PFERR_USER_MASK;
> +
> + /*
> + * PKeys 2nd and 6th conditions:
> + * 2.EFER_LMA=1
> + * 6.PKRU.AD=1
> + *   or The access is a data write and PKRU.WD=1 and
> + *   either CR0.WP=1 or it is a user mode access
> + */
> + pk = is_long_mode(vcpu) && (pkru_ad ||
> + (pkru_wd && wf && (is_write_protection(vcpu) || uf)));
> +
> + /*
> + * PK bit right value in pfec equal to
> + * PK bit current value in pfec and pk value.
> + */
> + pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK;

PK is only applicable to guest page tables, but if you do not support
PKRU without EPT (patch 9), none of this is necessary, is it?

>   /*
>* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
> @@ -163,8 +212,8 @@ static inline bool permission_fault(struct kvm_vcpu 
> *vcpu, struct kvm_mmu *mmu,
>* but it will be one in index if SMAP checks are being overridden.
>* It is important to keep this branchless.
>*/
> - unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> - int index = (pfec >> 1) +
> + smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> + index = (pfec >> 1) +
>   (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
>  
>   WARN_ON(pfec & PFERR_RSVD_MASK);
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 736e6ab..99563bc 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -253,6 +253,17 @@ static int FNAME(update_accessed_dirty_bits)(struct 
> kvm_vcpu *vcpu,
>   }
>   return 0;
>  }
> +static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
> +{
> + unsigned pkeys = 0;
> +#if PTTYPE == 64
> + pkeys = ((gpte & PT64_PKEY_BIT0) ? PKEYS_BIT0_VALUE : 0) |
> + ((gpte & PT64_PKEY_BIT1) ? PKEYS_BIT1_VALUE : 0) |
> + ((gpte & PT64_PKEY_BIT2) ? PKEYS_BIT2_VALUE : 0) |
> + ((gpte & PT64_PKEY_BIT3) ? PKEYS_BIT3_VALUE : 0);

This is just pkeys 

Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic

2015-11-09 Thread Paolo Bonzini


On 09/11/2015 13:43, Paolo Bonzini wrote:
> 
> 
> On 09/11/2015 12:54, Huaitong Han wrote:
>> Protection keys define a new 4-bit protection key field (PKEY) in bits
>> 62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
>> register(16 domains), every domain has 2 bits(write disable bit, access
>> disable bit).
>>
>> Static logic has been produced in update_permission_bitmask, dynamic logic
>> need read pkey from page table entries, get pkru value, and deduce the
>> correct result.
>>
>> Signed-off-by: Huaitong Han 
>>
>> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
>> index e4202e4..bbb 100644
>> --- a/arch/x86/kvm/mmu.h
>> +++ b/arch/x86/kvm/mmu.h
>> @@ -3,6 +3,7 @@
>>  
>>  #include 
>>  #include "kvm_cache_regs.h"
>> +#include "x86.h"
>>  
>>  #define PT64_PT_BITS 9
>>  #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
>> @@ -24,6 +25,11 @@
>>  #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
>>  #define PT_PAT_MASK (1ULL << 7)
>>  #define PT_GLOBAL_MASK (1ULL << 8)
>> +
>> +#define PT64_PKEY_BIT0 (1ULL << _PAGE_BIT_PKEY_BIT0)
>> +#define PT64_PKEY_BIT1 (1ULL << _PAGE_BIT_PKEY_BIT1)
>> +#define PT64_PKEY_BIT2 (1ULL << _PAGE_BIT_PKEY_BIT2)
>> +#define PT64_PKEY_BIT3 (1ULL << _PAGE_BIT_PKEY_BIT3)
>>  #define PT64_NX_SHIFT 63
>>  #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
>>  
>> @@ -45,6 +51,15 @@
>>  #define PT_PAGE_TABLE_LEVEL 1
>>  #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
>>  
>> +#define PKEYS_BIT0_VALUE (1ULL << 0)
>> +#define PKEYS_BIT1_VALUE (1ULL << 1)
>> +#define PKEYS_BIT2_VALUE (1ULL << 2)
>> +#define PKEYS_BIT3_VALUE (1ULL << 3)
>> +
>> +#define PKRU_READ   0
>> +#define PKRU_WRITE  1
>> +#define PKRU_ATTRS  2
>> +
>>  static inline u64 rsvd_bits(int s, int e)
>>  {
>>  return ((1ULL << (e - s + 1)) - 1) << s;
>> @@ -145,10 +160,44 @@ static inline bool is_write_protection(struct kvm_vcpu 
>> *vcpu)
>>   * fault with the given access (in ACC_* format)?
>>   */
>>  static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu 
>> *mmu,
>> -unsigned pte_access, unsigned pfec)
>> +unsigned pte_access, unsigned pte_pkeys, unsigned pfec)
>>  {
>> -int cpl = kvm_x86_ops->get_cpl(vcpu);
>> -unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
>> +unsigned long smap, rflags;
>> +u32 pkru;
>> +int cpl, index;
>> +bool wf, uf, pk, pkru_ad, pkru_wd;
>> +
>> +cpl = kvm_x86_ops->get_cpl(vcpu);
>> +rflags = kvm_x86_ops->get_rflags(vcpu);
>> +
>> +pkru = read_pkru();
>> +
>> +/*
>> +* PKRU defines 32 bits, there are 16 domains and 2 attribute bits per
>> +* domain in pkru, pkey is index to a defined domain, so the value of
>> +* pkey * PKRU_ATTRS + R/W is offset of a defined domain attribute.
>> +*/
>> +pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) & 1;
>> +pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_WRITE)) & 1;
>> +
>> +wf = pfec & PFERR_WRITE_MASK;
>> +uf = pfec & PFERR_USER_MASK;
>> +
>> +/*
>> +* PKeys 2nd and 6th conditions:
>> +* 2.EFER_LMA=1
>> +* 6.PKRU.AD=1
>> +*   or The access is a data write and PKRU.WD=1 and
>> +*   either CR0.WP=1 or it is a user mode access
>> +*/
>> +pk = is_long_mode(vcpu) && (pkru_ad ||
>> +(pkru_wd && wf && (is_write_protection(vcpu) || uf)));

A little more optimized:

pkru_bits = (pkru >> (pte_pkeys * PKRU_ATTRS)) & 3;

/*
 * Ignore PKRU.WD if not relevant to this access (a read,
 * or a supervisor mode access if CR0.WP=0).
 */
if (!wf || (!uf && !is_write_protection(vcpu)))
pkru_bits &= ~(1 << PKRU_WRITE);

... and then just check pkru_bits != 0.

>> +/*
>> +* PK bit right value in pfec equal to
>> +* PK bit current value in pfec and pk value.
>> +*/
>> +pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK;
> 
> PK is only applicable to guest page tables, but if you do not support
> PKRU without EPT (patch 9), none of this is necessary, is it?

Doh. :(  Sorry, this is of course needed for the emulation case.

I think you should optimize this for the common case where pkru is zero,
hence pk will always be zero.  So something like

pkru = is_long_mode(vcpu) ? read_pkru() : 0;
if (unlikely(pkru) && (pfec & PFERR_PK_MASK)) {
... from above ... */

/* Flip PFERR_PK_MASK if pkru_bits is non-zero */
pfec ^= -pkru_bits & PFERR_PK_MASK;
}

Paolo

>>  /*
>>   * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
>> @@ -163,8 +212,8 @@ static inline bool permission_fault(struct kvm_vcpu 
>> *vcpu, struct kvm_mmu *mmu,
>>   * but it will be one in index if SMAP checks are being overridden.
>>   * It is important to keep this branchless.
>>   */
>> -unsigned long smap = (cpl - 3) & (rflags &