Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-12 Thread Quentin Perret
On Friday 12 Mar 2021 at 11:18:05 (+), Will Deacon wrote:
> On Fri, Mar 12, 2021 at 10:13:26AM +, Quentin Perret wrote:
> > On Friday 12 Mar 2021 at 09:32:06 (+), Will Deacon wrote:
> > > I'm not saying to use the VMID directly, just that allocating half of the
> > > pte feels a bit OTT given that the state of things after this patch series
> > > is that we're using exactly 1 bit.
> > 
> > Right, and that was the reason for the PROT_NONE approach in the
> > previous version, but we agreed it'd be worth generalizing to allow for
> > future use-cases :-)
> 
> Yeah, just generalising to 32 bits feels like going too far! I dunno,
> make it a u8 for now, or define the hypervisor owner ID as 1 and reject
> owners greater than that? We can easily extend it later.

Alrighty I'll do _both_

> > > > > > @@ -517,28 +543,36 @@ static int stage2_map_walker_try_leaf(u64 
> > > > > > addr, u64 end, u32 level,
> > > > > > if (!kvm_block_mapping_supported(addr, end, phys, level))
> > > > > > return -E2BIG;
> > > > > >  
> > > > > > -   new = kvm_init_valid_leaf_pte(phys, data->attr, level);
> > > > > > -   if (kvm_pte_valid(old)) {
> > > > > > +   if (kvm_pte_valid(data->attr))
> > > > > 
> > > > > This feels like a bit of a hack to me: the 'attr' field in 
> > > > > stage2_map_data
> > > > > is intended to correspond directly to the lower/upper attributes of 
> > > > > the
> > > > > descriptor as per the architecture, so tagging the valid bit in there 
> > > > > is
> > > > > pretty grotty. However, I can see the significant advantage in being 
> > > > > able
> > > > > to re-use the stage2_map_walker functionality, so about instead of 
> > > > > nobbling
> > > > > attr, you set phys to something invalid instead, e.g.:
> > > > > 
> > > > >   #define KVM_PHYS_SET_OWNER  (-1ULL)
> > > > 
> > > > That'll confuse kvm_block_mapping_supported() and friends I think, at
> > > > least in their current form. If you _really_ don't like this, maybe we
> > > > could have an extra 'flags' field in stage2_map_data?
> > > 
> > > I was pondering this last night and I thought of two ways to do it:
> > > 
> > > 1. Add a 'bool valid' and then stick the owner and the phys in a union.
> > >(yes, you'll need to update the block mapping checks to look at the
> > > valid flag)
> > 
> > Right, though that is also used for the hyp s1 which doesn't use any of
> > this ATM. That shouldn't be too bad to change, I'll have a look.
> 
> Oh, I meant stick the bool in the stage2_map_data so that should be limited
> to the stage2 path.

I mean I still want to use kvm_block_mapping_supported() but ignore the
phys check when it's not valid. I find it ugly to add a 'valid'
parameter to the function itself, so maybe we're better off with just
special casing phys == -1ULL as you first suggested. How much do you hate
the below (totally untested)?

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 8e4599256969..9ec937462fd6 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -71,6 +71,13 @@ static u64 kvm_granule_size(u32 level)
return BIT(kvm_granule_shift(level));
 }

+#define KVM_PHYS_INVALID (-1ULL)
+
+static bool kvm_phys_is_valid(u64 phys)
+{
+   return phys != KVM_PHYS_INVALID;
+}
+
 static bool kvm_level_support_block_mappings(u32 level)
 {
/*
@@ -90,7 +97,10 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, 
u64 phys, u32 level)
if (granule > (end - addr))
return false;

-   return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
+   if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
+   return false;
+
+   return IS_ALIGNED(addr, granule);
 }

 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
@@ -550,7 +560,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, 
u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return -E2BIG;

-   if (kvm_pte_valid(data->attr))
+   if (kvm_phys_is_valid(phys))
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
else
new = kvm_init_invalid_leaf_owner(data->owner_id);
@@ -580,7 +590,8 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, 
u32 level,
smp_store_release(ptep, new);
if (stage2_pte_is_counted(new))
mm_ops->get_page(ptep);
-   data->phys += granule;
+   if (kvm_phys_is_valid(phys))
+   data->phys += granule;
return 0;
 }

@@ -739,9 +750,6 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
addr, u64 size,
if (ret)
return ret;

-   /* Set the valid flag to distinguish with the set_owner() path. */
-   map_data.attr |= KVM_PTE_VALID;
-
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
return ret;
@@ -752,6 +760,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_

Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-12 Thread Will Deacon
On Fri, Mar 12, 2021 at 10:13:26AM +, Quentin Perret wrote:
> On Friday 12 Mar 2021 at 09:32:06 (+), Will Deacon wrote:
> > I'm not saying to use the VMID directly, just that allocating half of the
> > pte feels a bit OTT given that the state of things after this patch series
> > is that we're using exactly 1 bit.
> 
> Right, and that was the reason for the PROT_NONE approach in the
> previous version, but we agreed it'd be worth generalizing to allow for
> future use-cases :-)

Yeah, just generalising to 32 bits feels like going too far! I dunno,
make it a u8 for now, or define the hypervisor owner ID as 1 and reject
owners greater than that? We can easily extend it later.

> > > > > @@ -517,28 +543,36 @@ static int stage2_map_walker_try_leaf(u64 addr, 
> > > > > u64 end, u32 level,
> > > > >   if (!kvm_block_mapping_supported(addr, end, phys, level))
> > > > >   return -E2BIG;
> > > > >  
> > > > > - new = kvm_init_valid_leaf_pte(phys, data->attr, level);
> > > > > - if (kvm_pte_valid(old)) {
> > > > > + if (kvm_pte_valid(data->attr))
> > > > 
> > > > This feels like a bit of a hack to me: the 'attr' field in 
> > > > stage2_map_data
> > > > is intended to correspond directly to the lower/upper attributes of the
> > > > descriptor as per the architecture, so tagging the valid bit in there is
> > > > pretty grotty. However, I can see the significant advantage in being 
> > > > able
> > > > to re-use the stage2_map_walker functionality, so about instead of 
> > > > nobbling
> > > > attr, you set phys to something invalid instead, e.g.:
> > > > 
> > > > #define KVM_PHYS_SET_OWNER  (-1ULL)
> > > 
> > > That'll confuse kvm_block_mapping_supported() and friends I think, at
> > > least in their current form. If you _really_ don't like this, maybe we
> > > could have an extra 'flags' field in stage2_map_data?
> > 
> > I was pondering this last night and I thought of two ways to do it:
> > 
> > 1. Add a 'bool valid' and then stick the owner and the phys in a union.
> >(yes, you'll need to update the block mapping checks to look at the
> > valid flag)
> 
> Right, though that is also used for the hyp s1 which doesn't use any of
> this ATM. That shouldn't be too bad to change, I'll have a look.

Oh, I meant stick the bool in the stage2_map_data so that should be limited
to the stage2 path.

Will


Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-12 Thread Quentin Perret
On Friday 12 Mar 2021 at 09:32:06 (+), Will Deacon wrote:
> I'm not saying to use the VMID directly, just that allocating half of the
> pte feels a bit OTT given that the state of things after this patch series
> is that we're using exactly 1 bit.

Right, and that was the reason for the PROT_NONE approach in the
previous version, but we agreed it'd be worth generalizing to allow for
future use-cases :-)

> > > > @@ -517,28 +543,36 @@ static int stage2_map_walker_try_leaf(u64 addr, 
> > > > u64 end, u32 level,
> > > > if (!kvm_block_mapping_supported(addr, end, phys, level))
> > > > return -E2BIG;
> > > >  
> > > > -   new = kvm_init_valid_leaf_pte(phys, data->attr, level);
> > > > -   if (kvm_pte_valid(old)) {
> > > > +   if (kvm_pte_valid(data->attr))
> > > 
> > > This feels like a bit of a hack to me: the 'attr' field in stage2_map_data
> > > is intended to correspond directly to the lower/upper attributes of the
> > > descriptor as per the architecture, so tagging the valid bit in there is
> > > pretty grotty. However, I can see the significant advantage in being able
> > > to re-use the stage2_map_walker functionality, so about instead of 
> > > nobbling
> > > attr, you set phys to something invalid instead, e.g.:
> > > 
> > >   #define KVM_PHYS_SET_OWNER  (-1ULL)
> > 
> > That'll confuse kvm_block_mapping_supported() and friends I think, at
> > least in their current form. If you _really_ don't like this, maybe we
> > could have an extra 'flags' field in stage2_map_data?
> 
> I was pondering this last night and I thought of two ways to do it:
> 
> 1. Add a 'bool valid' and then stick the owner and the phys in a union.
>(yes, you'll need to update the block mapping checks to look at the
> valid flag)

Right, though that is also used for the hyp s1 which doesn't use any of
this ATM. That shouldn't be too bad to change, I'll have a look.

> 2. Go with my latter suggestion:
> 
> > > Is there ever a reason to use kvm_pgtable_stage2_set_owner() to set an
> > > owner of 0, or should you just use the map/unmap APIs for that? If so,
> > > then maybe the key is simply if owner_id is non-zero, then an invalid
> > > entry is installed?
> > 
> > I couldn't find a good reason to restrict it, as that wouldn't change
> > the implementation much anyway. Also, if we added the right CMOs, we
> > could probably remove the unmap walker and re-express it in terms of
> > set_owner(0) ... But I suppose that is for later :-)
> 
> The idea being that if owner is 0, then we install a mapping for phys, but
> if owner is !0 then we set the invalid mapping.

And I could even special-case set_owner(0) by calling unmap under the
hood so the caller doesn't need to care, but it's a bit yuck.

> (1) is probably the less hacky option... what do you reckon?

Agreed, (1) is a bit nicer. I was also considering setting phys = BIT(63)
in the set_owner() path. That makes it obvious it is an invalid PA, and
it should retain the nice alignment properties I need. But I suppose an
explicit flag makes it easier to reason about, so I'll have a go at it.

Thanks,
Quentin


Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-12 Thread Will Deacon
On Fri, Mar 12, 2021 at 06:23:00AM +, Quentin Perret wrote:
> On Thursday 11 Mar 2021 at 18:38:36 (+), Will Deacon wrote:
> > On Wed, Mar 10, 2021 at 05:57:45PM +, Quentin Perret wrote:
> > > As the host stage 2 will be identity mapped, all the .hyp memory regions
> > > and/or memory pages donated to protected guestis will have to marked
> > > invalid in the host stage 2 page-table. At the same time, the hypervisor
> > > will need a way to track the ownership of each physical page to ensure
> > > memory sharing or donation between entities (host, guests, hypervisor) is
> > > legal.
> > > 
> > > In order to enable this tracking at EL2, let's use the host stage 2
> > > page-table itself. The idea is to use the top bits of invalid mappings
> > > to store the unique identifier of the page owner. The page-table owner
> > > (the host) gets identifier 0 such that, at boot time, it owns the entire
> > > IPA space as the pgd starts zeroed.
> > > 
> > > Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> > > ownership of pages in the host stage 2. It re-uses most of the map()
> > > logic, but ends up creating invalid mappings instead. This impacts
> > > how we do refcount as we now need to count invalid mappings when they
> > > are used for ownership tracking.
> > > 
> > > Signed-off-by: Quentin Perret 
> > > ---
> > >  arch/arm64/include/asm/kvm_pgtable.h | 21 +++
> > >  arch/arm64/kvm/hyp/pgtable.c | 92 
> > >  2 files changed, 101 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> > > b/arch/arm64/include/asm/kvm_pgtable.h
> > > index 4ae19247837b..b09af4612656 100644
> > > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > > @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, 
> > > u64 addr, u64 size,
> > >  u64 phys, enum kvm_pgtable_prot prot,
> > >  void *mc);
> > >  
> > > +/**
> > > + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with 
> > > metadata
> > > + *   encoding the ownership of a page in 
> > > the
> > > + *   IPA space.
> > > + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> > > + * @addr:Intermediate physical address at which to place the 
> > > annotation.
> > 
> > This confused me a bit, as the annotation is stored in the page-table, not
> > at the memory identified by @addr. How about:
> > 
> >   "Base intermediate physical address to annotate"
> > 
> > > + * @size:Size of the IPA range to annotate.
> > 
> >   "Size of the annotated range"
> > 
> > > + * @mc:  Cache of pre-allocated and zeroed memory from which to 
> > > allocate
> > > + *   page-table pages.
> > > + * @owner_id:Unique identifier for the owner of the page.
> > > + *
> > > + * The page-table owner has identifier 0.
> > 
> > Perhaps, "By default, all page-tables are owned by identifier 0"
> 
> Ack all of the above.
> 
> > > + *
> > > + * Return: 0 on success, negative error code on failure.
> > > + */
> > > +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 
> > > size,
> > > +  void *mc, u32 owner_id);
> > 
> > Is there a need for the owner_id to be 32-bit rather than e.g. 16-bit? Just
> > strikes me that it might be difficult to recover these bits in future if we
> > give them out freely now.
> 
> I figured we might want to use identifiers that are stable for the
> lifetime of protected VMs. I wasn't sure using e.g. VMIDs would be a
> better choice here as re-using them will cause a lot of pain for the
> host stage 2 pgtable maintenance.

I'm not saying to use the VMID directly, just that allocating half of the
pte feels a bit OTT given that the state of things after this patch series
is that we're using exactly 1 bit.

> > > @@ -517,28 +543,36 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 
> > > end, u32 level,
> > >   if (!kvm_block_mapping_supported(addr, end, phys, level))
> > >   return -E2BIG;
> > >  
> > > - new = kvm_init_valid_leaf_pte(phys, data->attr, level);
> > > - if (kvm_pte_valid(old)) {
> > > + if (kvm_pte_valid(data->attr))
> > 
> > This feels like a bit of a hack to me: the 'attr' field in stage2_map_data
> > is intended to correspond directly to the lower/upper attributes of the
> > descriptor as per the architecture, so tagging the valid bit in there is
> > pretty grotty. However, I can see the significant advantage in being able
> > to re-use the stage2_map_walker functionality, so about instead of nobbling
> > attr, you set phys to something invalid instead, e.g.:
> > 
> > #define KVM_PHYS_SET_OWNER  (-1ULL)
> 
> That'll confuse kvm_block_mapping_supported() and friends I think, at
> least in their current form. If you _really_ don't like this, maybe we
> could have an extra 

Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-11 Thread Quentin Perret
On Thursday 11 Mar 2021 at 18:38:36 (+), Will Deacon wrote:
> On Wed, Mar 10, 2021 at 05:57:45PM +, Quentin Perret wrote:
> > As the host stage 2 will be identity mapped, all the .hyp memory regions
> > and/or memory pages donated to protected guestis will have to marked
> > invalid in the host stage 2 page-table. At the same time, the hypervisor
> > will need a way to track the ownership of each physical page to ensure
> > memory sharing or donation between entities (host, guests, hypervisor) is
> > legal.
> > 
> > In order to enable this tracking at EL2, let's use the host stage 2
> > page-table itself. The idea is to use the top bits of invalid mappings
> > to store the unique identifier of the page owner. The page-table owner
> > (the host) gets identifier 0 such that, at boot time, it owns the entire
> > IPA space as the pgd starts zeroed.
> > 
> > Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> > ownership of pages in the host stage 2. It re-uses most of the map()
> > logic, but ends up creating invalid mappings instead. This impacts
> > how we do refcount as we now need to count invalid mappings when they
> > are used for ownership tracking.
> > 
> > Signed-off-by: Quentin Perret 
> > ---
> >  arch/arm64/include/asm/kvm_pgtable.h | 21 +++
> >  arch/arm64/kvm/hyp/pgtable.c | 92 
> >  2 files changed, 101 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> > b/arch/arm64/include/asm/kvm_pgtable.h
> > index 4ae19247837b..b09af4612656 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, 
> > u64 addr, u64 size,
> >u64 phys, enum kvm_pgtable_prot prot,
> >void *mc);
> >  
> > +/**
> > + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
> > + * encoding the ownership of a page in the
> > + * IPA space.
> > + * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
> > + * @addr:  Intermediate physical address at which to place the annotation.
> 
> This confused me a bit, as the annotation is stored in the page-table, not
> at the memory identified by @addr. How about:
> 
>   "Base intermediate physical address to annotate"
> 
> > + * @size:  Size of the IPA range to annotate.
> 
>   "Size of the annotated range"
> 
> > + * @mc:Cache of pre-allocated and zeroed memory from which to 
> > allocate
> > + * page-table pages.
> > + * @owner_id:  Unique identifier for the owner of the page.
> > + *
> > + * The page-table owner has identifier 0.
> 
> Perhaps, "By default, all page-tables are owned by identifier 0"

Ack all of the above.

> > + *
> > + * Return: 0 on success, negative error code on failure.
> > + */
> > +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 
> > size,
> > +void *mc, u32 owner_id);
> 
> Is there a need for the owner_id to be 32-bit rather than e.g. 16-bit? Just
> strikes me that it might be difficult to recover these bits in future if we
> give them out freely now.

I figured we might want to use identifiers that are stable for the
lifetime of protected VMs. I wasn't sure using e.g. VMIDs would be a
better choice here as re-using them will cause a lot of pain for the
host stage 2 pgtable maintenance.

> > +
> >  /**
> >   * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
> > page-table.
> >   * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
> > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> > index f37b4179b880..e4670b639726 100644
> > --- a/arch/arm64/kvm/hyp/pgtable.c
> > +++ b/arch/arm64/kvm/hyp/pgtable.c
> > @@ -48,6 +48,8 @@
> >  KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
> >  KVM_PTE_LEAF_ATTR_HI_S2_XN)
> >  
> > +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 32)
> 
> Ah, so that '02' earlier was a typo for '32'.
> 
> > +
> >  struct kvm_pgtable_walk_data {
> > struct kvm_pgtable  *pgt;
> > struct kvm_pgtable_walker   *walker;
> > @@ -186,6 +188,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, 
> > kvm_pte_t attr, u32 level)
> > return pte;
> >  }
> >  
> > +static kvm_pte_t kvm_init_invalid_leaf_owner(u32 owner_id)
> > +{
> > +   return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> > +}
> > +
> >  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 
> > addr,
> >   u32 level, kvm_pte_t *ptep,
> >   enum kvm_pgtable_walk_flags flag)
> > @@ -440,6 +447,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
> >  struct stage2_map_data {
> > u64 phys;
>

Re: [PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-11 Thread Will Deacon
On Wed, Mar 10, 2021 at 05:57:45PM +, Quentin Perret wrote:
> As the host stage 2 will be identity mapped, all the .hyp memory regions
> and/or memory pages donated to protected guestis will have to marked
> invalid in the host stage 2 page-table. At the same time, the hypervisor
> will need a way to track the ownership of each physical page to ensure
> memory sharing or donation between entities (host, guests, hypervisor) is
> legal.
> 
> In order to enable this tracking at EL2, let's use the host stage 2
> page-table itself. The idea is to use the top bits of invalid mappings
> to store the unique identifier of the page owner. The page-table owner
> (the host) gets identifier 0 such that, at boot time, it owns the entire
> IPA space as the pgd starts zeroed.
> 
> Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> ownership of pages in the host stage 2. It re-uses most of the map()
> logic, but ends up creating invalid mappings instead. This impacts
> how we do refcount as we now need to count invalid mappings when they
> are used for ownership tracking.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h | 21 +++
>  arch/arm64/kvm/hyp/pgtable.c | 92 
>  2 files changed, 101 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> b/arch/arm64/include/asm/kvm_pgtable.h
> index 4ae19247837b..b09af4612656 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
> addr, u64 size,
>  u64 phys, enum kvm_pgtable_prot prot,
>  void *mc);
>  
> +/**
> + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
> + *   encoding the ownership of a page in the
> + *   IPA space.
> + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> + * @addr:Intermediate physical address at which to place the annotation.

This confused me a bit, as the annotation is stored in the page-table, not
at the memory identified by @addr. How about:

  "Base intermediate physical address to annotate"

> + * @size:Size of the IPA range to annotate.

  "Size of the annotated range"

> + * @mc:  Cache of pre-allocated and zeroed memory from which to 
> allocate
> + *   page-table pages.
> + * @owner_id:Unique identifier for the owner of the page.
> + *
> + * The page-table owner has identifier 0.

Perhaps, "By default, all page-tables are owned by identifier 0"

> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
> +  void *mc, u32 owner_id);

Is there a need for the owner_id to be 32-bit rather than e.g. 16-bit? Just
strikes me that it might be difficult to recover these bits in future if we
give them out freely now.

> +
>  /**
>   * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
> page-table.
>   * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index f37b4179b880..e4670b639726 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -48,6 +48,8 @@
>KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
>KVM_PTE_LEAF_ATTR_HI_S2_XN)
>  
> +#define KVM_INVALID_PTE_OWNER_MASK   GENMASK(63, 32)

Ah, so that '02' earlier was a typo for '32'.

> +
>  struct kvm_pgtable_walk_data {
>   struct kvm_pgtable  *pgt;
>   struct kvm_pgtable_walker   *walker;
> @@ -186,6 +188,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, 
> kvm_pte_t attr, u32 level)
>   return pte;
>  }
>  
> +static kvm_pte_t kvm_init_invalid_leaf_owner(u32 owner_id)
> +{
> + return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
> +}
> +
>  static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 
> addr,
> u32 level, kvm_pte_t *ptep,
> enum kvm_pgtable_walk_flags flag)
> @@ -440,6 +447,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
>  struct stage2_map_data {
>   u64 phys;
>   kvm_pte_t   attr;
> + u32 owner_id;
>  
>   kvm_pte_t   *anchor;
>   kvm_pte_t   *childp;
> @@ -506,6 +514,24 @@ static int stage2_map_set_prot_attr(enum 
> kvm_pgtable_prot prot,
>   return 0;
>  }
>  
> +static bool stage2_is_permission_change(kvm_pte_t old, kvm_pte_t new)
> +{
> + if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
> + return false;
> +
> + return !((ol

[PATCH v4 28/34] KVM: arm64: Use page-table to track page ownership

2021-03-10 Thread Quentin Perret
As the host stage 2 will be identity mapped, all the .hyp memory regions
and/or memory pages donated to protected guestis will have to marked
invalid in the host stage 2 page-table. At the same time, the hypervisor
will need a way to track the ownership of each physical page to ensure
memory sharing or donation between entities (host, guests, hypervisor) is
legal.

In order to enable this tracking at EL2, let's use the host stage 2
page-table itself. The idea is to use the top bits of invalid mappings
to store the unique identifier of the page owner. The page-table owner
(the host) gets identifier 0 such that, at boot time, it owns the entire
IPA space as the pgd starts zeroed.

Provide kvm_pgtable_stage2_set_owner() which allows to modify the
ownership of pages in the host stage 2. It re-uses most of the map()
logic, but ends up creating invalid mappings instead. This impacts
how we do refcount as we now need to count invalid mappings when they
are used for ownership tracking.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h | 21 +++
 arch/arm64/kvm/hyp/pgtable.c | 92 
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 4ae19247837b..b09af4612656 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
addr, u64 size,
   u64 phys, enum kvm_pgtable_prot prot,
   void *mc);
 
+/**
+ * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
+ * encoding the ownership of a page in the
+ * IPA space.
+ * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr:  Intermediate physical address at which to place the annotation.
+ * @size:  Size of the IPA range to annotate.
+ * @mc:Cache of pre-allocated and zeroed memory from which to 
allocate
+ * page-table pages.
+ * @owner_id:  Unique identifier for the owner of the page.
+ *
+ * The page-table owner has identifier 0. This function can be used to mark
+ * portions of the IPA space as owned by other entities. When a stage 2 is used
+ * with identity-mappings, these annotations allow to use the page-table data
+ * structure as a simple rmap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
+void *mc, u32 owner_id);
+
 /**
  * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
page-table.
  * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f37b4179b880..e4670b639726 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -48,6 +48,8 @@
 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 32)
+
 struct kvm_pgtable_walk_data {
struct kvm_pgtable  *pgt;
struct kvm_pgtable_walker   *walker;
@@ -186,6 +188,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t 
attr, u32 level)
return pte;
 }
 
+static kvm_pte_t kvm_init_invalid_leaf_owner(u32 owner_id)
+{
+   return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
+}
+
 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
  u32 level, kvm_pte_t *ptep,
  enum kvm_pgtable_walk_flags flag)
@@ -440,6 +447,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 struct stage2_map_data {
u64 phys;
kvm_pte_t   attr;
+   u32 owner_id;
 
kvm_pte_t   *anchor;
kvm_pte_t   *childp;
@@ -506,6 +514,24 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot 
prot,
return 0;
 }
 
+static bool stage2_is_permission_change(kvm_pte_t old, kvm_pte_t new)
+{
+   if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
+   return false;
+
+   return !((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
+}
+
+static bool stage2_pte_is_counted(kvm_pte_t pte)
+{
+   /*
+* The refcount tracks valid entries as well as invalid entries if they
+* encode ownership of a page to another entity than the page-table
+* owner, whose id is 0.
+*/
+   return !!pte;
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
  kvm_pte_t *ptep,