Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-29 Thread Chao Peng
On Fri, Aug 26, 2022 at 04:19:43PM +0100, Fuad Tabba wrote:
> > +bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
> > +{
> > +   return false;
> > +}
> > +
> >  static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> >  {
> > u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> > @@ -4689,6 +4729,22 @@ static long kvm_vm_ioctl(struct file *filp,
> > r = kvm_vm_ioctl_set_memory_region(kvm, );
> > break;
> > }
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +   case KVM_MEMORY_ENCRYPT_REG_REGION:
> > +   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > +   struct kvm_enc_region region;
> > +
> > +   if (!kvm_arch_private_mem_supported(kvm))
> > +   goto arch_vm_ioctl;
> > +
> > +   r = -EFAULT;
> > +   if (copy_from_user(, argp, sizeof(region)))
> > +   goto out;
> > +
> > +   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );
> > +   break;
> > +   }
> > +#endif
> > case KVM_GET_DIRTY_LOG: {
> > struct kvm_dirty_log log;
> >
> > @@ -4842,6 +4898,7 @@ static long kvm_vm_ioctl(struct file *filp,
> > r = kvm_vm_ioctl_get_stats_fd(kvm);
> > break;
> > default:
> > +arch_vm_ioctl:
> 
> It might be good to make this label conditional on
> CONFIG_HAVE_KVM_PRIVATE_MEM, otherwise you get a warning if
> CONFIG_HAVE_KVM_PRIVATE_MEM isn't defined.
> 
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
>  arch_vm_ioctl:
> +#endif

Right, as the bot already complains.

Chao
> 
> Cheers,
> /fuad
> 
> 
> 
> 
> 
> > r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> > }
> >  out:
> > --
> > 2.25.1
> >



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-26 Thread Fuad Tabba
Hi Chao,

On Wed, Jul 6, 2022 at 9:27 AM Chao Peng  wrote:
>
> If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
> guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
> ioctls. The patch reuses existing SEV ioctl but differs that the
> address in the region for private memory is gpa while SEV case it's hva.
>
> The private memory region is stored as xarray in KVM for memory
> efficiency in normal usages and zapping existing memory mappings is also
> a side effect of these two ioctls.
>
> Signed-off-by: Chao Peng 
> ---
>  Documentation/virt/kvm/api.rst  | 17 +++---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/Kconfig|  1 +
>  arch/x86/kvm/mmu.h  |  2 --
>  include/linux/kvm_host.h|  8 +
>  virt/kvm/kvm_main.c | 57 +
>  6 files changed, 80 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 5ecfc7fbe0ee..dfb4caecab73 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4715,10 +4715,19 @@ Documentation/virt/kvm/amd-memory-encryption.rst.
>  This ioctl can be used to register a guest memory region which may
>  contain encrypted data (e.g. guest RAM, SMRAM etc).
>
> -It is used in the SEV-enabled guest. When encryption is enabled, a guest
> -memory region may contain encrypted data. The SEV memory encryption
> -engine uses a tweak such that two identical plaintext pages, each at
> -different locations will have differing ciphertexts. So swapping or
> +Currently this ioctl supports registering memory regions for two usages:
> +private memory and SEV-encrypted memory.
> +
> +When private memory is enabled, this ioctl is used to register guest private
> +memory region and the addr/size of kvm_enc_region represents guest physical
> +address (GPA). In this usage, this ioctl zaps the existing guest memory
> +mappings in KVM that fallen into the region.
> +
> +When SEV-encrypted memory is enabled, this ioctl is used to register guest
> +memory region which may contain encrypted data for a SEV-enabled guest. The
> +addr/size of kvm_enc_region represents userspace address (HVA). The SEV
> +memory encryption engine uses a tweak such that two identical plaintext 
> pages,
> +each at different locations will have differing ciphertexts. So swapping or
>  moving ciphertext of those pages will not result in plaintext being
>  swapped. So relocating (or migrating) physical backing pages for the SEV
>  guest will require some additional steps.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index dae190e19fce..92120e3a224e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -37,6 +37,7 @@
>  #include 
>
>  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ZAP_GFN_RANGE
>
>  #define KVM_MAX_VCPUS 1024
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 1f160801e2a7..05861b9656a4 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -50,6 +50,7 @@ config KVM
> select HAVE_KVM_PM_NOTIFIER if PM
> select HAVE_KVM_PRIVATE_MEM if X86_64
> select MEMFILE_NOTIFIER if HAVE_KVM_PRIVATE_MEM
> +   select XARRAY_MULTI if HAVE_KVM_PRIVATE_MEM
> help
>   Support hosting fully virtualized guest machines using hardware
>   virtualization extensions.  You will need a fairly recent
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index a99acec925eb..428cd2e88cbd 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -209,8 +209,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, 
> struct kvm_mmu *mmu,
> return -(u32)fault & errcode;
>  }
>
> -void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> -
>  int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
>
>  int kvm_mmu_post_init_vm(struct kvm *kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1b203c8aa696..da33f8828456 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -260,6 +260,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct 
> kvm_gfn_range *range);
>  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
>  #endif
>
> +#ifdef __KVM_HAVE_ZAP_GFN_RANGE
> +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> +#endif
> +
>  enum {
> OUTSIDE_GUEST_MODE,
> IN_GUEST_MODE,
> @@ -795,6 +799,9 @@ struct kvm {
> struct notifier_block pm_notifier;
>  #endif
> char stats_id[KVM_STATS_NAME_SIZE];
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +   struct xarray mem_attr_array;
> +#endif
>  };
>
>  #define kvm_err(fmt, ...) \
> @@ -1459,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu 
> *vcpu);
>  int kvm_arch_post_init_vm(struct kvm *kvm);
>  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
>  int 

Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-24 Thread Chao Peng
On Fri, Aug 19, 2022 at 12:37:42PM -0700, Vishal Annapurve wrote:
> > ...
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 230c8ff9659c..bb714c2a4b06 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -914,6 +914,35 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
> >
> >  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
> >
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +#define KVM_MEM_ATTR_PRIVATE   0x0001
> > +static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int 
> > ioctl,
> > +struct kvm_enc_region *region)
> > +{
> > +   unsigned long start, end;
> > +   void *entry;
> > +   int r;
> > +
> > +   if (region->size == 0 || region->addr + region->size < region->addr)
> > +   return -EINVAL;
> > +   if (region->addr & (PAGE_SIZE - 1) || region->size & (PAGE_SIZE - 
> > 1))
> > +   return -EINVAL;
> > +
> > +   start = region->addr >> PAGE_SHIFT;
> > +   end = (region->addr + region->size - 1) >> PAGE_SHIFT;
> > +
> > +   entry = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION ?
> > +   xa_mk_value(KVM_MEM_ATTR_PRIVATE) : NULL;
> > +
> > +   r = xa_err(xa_store_range(>mem_attr_array, start, end,
> > +   entry, GFP_KERNEL_ACCOUNT));
> 
> xa_store_range seems to create multi-index entries by default.
> Subsequent xa_store_range call changes all the entries stored
> previously.

By using xa_store_range and storing them as multi-index entries I
expected to save some memory for continuous pages originally.

But sounds like the current multi-index store behaviour isn't quite
ready for our usage.

Chao
> xa_store needs to be used here instead of xa_store_range to achieve
> the intended behavior.
> 
> > +
> > +   kvm_zap_gfn_range(kvm, start, end + 1);
> > +
> > +   return r;
> > +}
> > +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> > +
> > ...



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-19 Thread Vishal Annapurve
> ...
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 230c8ff9659c..bb714c2a4b06 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -914,6 +914,35 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +#define KVM_MEM_ATTR_PRIVATE   0x0001
> +static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int 
> ioctl,
> +struct kvm_enc_region *region)
> +{
> +   unsigned long start, end;
> +   void *entry;
> +   int r;
> +
> +   if (region->size == 0 || region->addr + region->size < region->addr)
> +   return -EINVAL;
> +   if (region->addr & (PAGE_SIZE - 1) || region->size & (PAGE_SIZE - 1))
> +   return -EINVAL;
> +
> +   start = region->addr >> PAGE_SHIFT;
> +   end = (region->addr + region->size - 1) >> PAGE_SHIFT;
> +
> +   entry = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION ?
> +   xa_mk_value(KVM_MEM_ATTR_PRIVATE) : NULL;
> +
> +   r = xa_err(xa_store_range(>mem_attr_array, start, end,
> +   entry, GFP_KERNEL_ACCOUNT));

xa_store_range seems to create multi-index entries by default.
Subsequent xa_store_range call changes all the entries stored
previously.
xa_store needs to be used here instead of xa_store_range to achieve
the intended behavior.

> +
> +   kvm_zap_gfn_range(kvm, start, end + 1);
> +
> +   return r;
> +}
> +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
> ...



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-04 Thread Chao Peng
On Wed, Aug 03, 2022 at 03:51:24PM +, Sean Christopherson wrote:
> On Wed, Aug 03, 2022, Chao Peng wrote:
> > On Tue, Aug 02, 2022 at 04:38:55PM +, Sean Christopherson wrote:
> > > On Tue, Aug 02, 2022, Sean Christopherson wrote:
> > > > I think we should avoid UNMAPPABLE even on the KVM side of things for 
> > > > the core
> > > > memslots functionality and instead be very literal, e.g.
> > > > 
> > > > KVM_HAS_FD_BASED_MEMSLOTS
> > > > KVM_MEM_FD_VALID
> > > > 
> > > > We'll still need KVM_HAS_USER_UNMAPPABLE_MEMORY, but it won't be tied 
> > > > directly to
> > > > the memslot.  Decoupling the two thingis will require a bit of extra 
> > > > work, but the
> > > > code impact should be quite small, e.g. explicitly query and propagate
> > > > MEMFILE_F_USER_INACCESSIBLE to kvm_memory_slot to track if a memslot 
> > > > can be private.
> > > > And unless I'm missing something, it won't require an additional 
> > > > memslot flag.
> > > > The biggest oddity (if we don't also add KVM_MEM_PRIVATE) is that KVM 
> > > > would
> > > > effectively ignore the hva for fd-based memslots for VM types that 
> > > > don't support
> > > > private memory, i.e. userspace can't opt out of using the fd-based 
> > > > backing, but that
> > > > doesn't seem like a deal breaker.
> > 
> > I actually love this idea. I don't mind adding extra code for potential
> > usage other than confidential VMs if we can have a workable solution for
> > it.
> > 
> > > 
> > > Hrm, but basing private memory on top of a generic FD_VALID would 
> > > effectively require
> > > shared memory to use hva-based memslots for confidential VMs.  That'd 
> > > yield a very
> > > weird API, e.g. non-confidential VMs could be backed entirely by fd-based 
> > > memslots,
> > > but confidential VMs would be forced to use hva-based memslots.
> > 
> > It would work if we can treat userspace_addr as optional for
> > KVM_MEM_FD_VALID, e.g. userspace can opt in to decide whether needing
> > the mappable part or not for a regular VM and we can enforce KVM for
> > confidential VMs. But the u64 type of userspace_addr doesn't allow us to
> > express a 'null' value so sounds like we will end up needing another
> > flag anyway.
> > 
> > In concept, we could have three cofigurations here:
> >   1. hva-only: without any flag and use userspace_addr;
> >   2. fd-only:  another new flag is needed and use fd/offset;
> >   3. hva/fd mixed: both userspace_addr and fd/offset is effective.
> >  KVM_MEM_PRIVATE is a subset of it for confidential VMs. Not sure
> >  regular VM also wants this.
> 
> My mental model breaks things down slightly differently, though the end 
> result is
> more or less the same. 
> 
> After this series, there will be two types of memory: private and "regular" 
> (I'm
> trying to avoid "shared").  "Regular" memory is always hva-based 
> (userspace_addr),
> and private always fd-based (fd+offset).
> 
> In the future, if we want to support fd-based memory for "regular" memory, 
> then
> as you said we'd need to add a new flag, and a new fd+offset pair.
> 
> At that point, we'd have two new (relatively to current) flags:
> 
>   KVM_MEM_PRIVATE_FD_VALID
>   KVM_MEM_FD_VALID
> 
> along with two new pairs of fd+offset (private_* and "regular").  Mapping 
> those
> to your above list:

I previously thought we could reuse the private_fd (name should be
changed) for regular VM as well so only need one pair of fd+offset, the
meaning of the fd can be decided by the flag. But introducing two pairs
of them may support extra usages like one fd for regular memory and
another private_fd for private memory, though unsure this is a useful
configuration.

>   
>   1.  Neither *_FD_VALID flag set.
>   2a. Both PRIVATE_FD_VALID and FD_VALID are set
>   2b. FD_VALID is set and the VM doesn't support private memory
>   3.  Only PRIVATE_FD_VALID is set (which private memory support in the VM).
> 
> Thus, "regular" VMs can't have a mix in a single memslot because they can't 
> use
> private memory.
> 
> > There is no direct relationship between unmappable and fd-based since
> > even fd-based can also be mappable for regular VM?

Hmm, yes, for private memory we have special treatment in page fault
handler and that is not applied to regular VM.

> 
> Yep.
> 
> > > Ignore this idea for now.  If there's an actual use case for generic 
> > > fd-based memory
> > > then we'll want a separate flag, fd, and offset, i.e. that support could 
> > > be added
> > > independent of KVM_MEM_PRIVATE.
> > 
> > If we ignore this idea now (which I'm also fine), do you still think we
> > need change KVM_MEM_PRIVATE to KVM_MEM_USER_UNMAPPBLE?
> 
> Hmm, no.  After working through this, I think it's safe to say 
> KVM_MEM_USER_UNMAPPABLE
> is bad name because we could end up with "regular" memory that's backed by an
> inaccessible (unmappable) file.
> 
> One alternative would be to call it KVM_MEM_PROTECTED.  That shouldn't cause
> problems for the known use of "private" 

Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-03 Thread Sean Christopherson
On Wed, Aug 03, 2022, Chao Peng wrote:
> On Tue, Aug 02, 2022 at 04:38:55PM +, Sean Christopherson wrote:
> > On Tue, Aug 02, 2022, Sean Christopherson wrote:
> > > I think we should avoid UNMAPPABLE even on the KVM side of things for the 
> > > core
> > > memslots functionality and instead be very literal, e.g.
> > > 
> > >   KVM_HAS_FD_BASED_MEMSLOTS
> > >   KVM_MEM_FD_VALID
> > > 
> > > We'll still need KVM_HAS_USER_UNMAPPABLE_MEMORY, but it won't be tied 
> > > directly to
> > > the memslot.  Decoupling the two thingis will require a bit of extra 
> > > work, but the
> > > code impact should be quite small, e.g. explicitly query and propagate
> > > MEMFILE_F_USER_INACCESSIBLE to kvm_memory_slot to track if a memslot can 
> > > be private.
> > > And unless I'm missing something, it won't require an additional memslot 
> > > flag.
> > > The biggest oddity (if we don't also add KVM_MEM_PRIVATE) is that KVM 
> > > would
> > > effectively ignore the hva for fd-based memslots for VM types that don't 
> > > support
> > > private memory, i.e. userspace can't opt out of using the fd-based 
> > > backing, but that
> > > doesn't seem like a deal breaker.
> 
> I actually love this idea. I don't mind adding extra code for potential
> usage other than confidential VMs if we can have a workable solution for
> it.
> 
> > 
> > Hrm, but basing private memory on top of a generic FD_VALID would 
> > effectively require
> > shared memory to use hva-based memslots for confidential VMs.  That'd yield 
> > a very
> > weird API, e.g. non-confidential VMs could be backed entirely by fd-based 
> > memslots,
> > but confidential VMs would be forced to use hva-based memslots.
> 
> It would work if we can treat userspace_addr as optional for
> KVM_MEM_FD_VALID, e.g. userspace can opt in to decide whether needing
> the mappable part or not for a regular VM and we can enforce KVM for
> confidential VMs. But the u64 type of userspace_addr doesn't allow us to
> express a 'null' value so sounds like we will end up needing another
> flag anyway.
> 
> In concept, we could have three cofigurations here:
>   1. hva-only: without any flag and use userspace_addr;
>   2. fd-only:  another new flag is needed and use fd/offset;
>   3. hva/fd mixed: both userspace_addr and fd/offset is effective.
>  KVM_MEM_PRIVATE is a subset of it for confidential VMs. Not sure
>  regular VM also wants this.

My mental model breaks things down slightly differently, though the end result 
is
more or less the same. 

After this series, there will be two types of memory: private and "regular" (I'm
trying to avoid "shared").  "Regular" memory is always hva-based 
(userspace_addr),
and private always fd-based (fd+offset).

In the future, if we want to support fd-based memory for "regular" memory, then
as you said we'd need to add a new flag, and a new fd+offset pair.

At that point, we'd have two new (relatively to current) flags:

  KVM_MEM_PRIVATE_FD_VALID
  KVM_MEM_FD_VALID

along with two new pairs of fd+offset (private_* and "regular").  Mapping those
to your above list:
  
  1.  Neither *_FD_VALID flag set.
  2a. Both PRIVATE_FD_VALID and FD_VALID are set
  2b. FD_VALID is set and the VM doesn't support private memory
  3.  Only PRIVATE_FD_VALID is set (which private memory support in the VM).

Thus, "regular" VMs can't have a mix in a single memslot because they can't use
private memory.

> There is no direct relationship between unmappable and fd-based since
> even fd-based can also be mappable for regular VM?

Yep.

> > Ignore this idea for now.  If there's an actual use case for generic 
> > fd-based memory
> > then we'll want a separate flag, fd, and offset, i.e. that support could be 
> > added
> > independent of KVM_MEM_PRIVATE.
> 
> If we ignore this idea now (which I'm also fine), do you still think we
> need change KVM_MEM_PRIVATE to KVM_MEM_USER_UNMAPPBLE?

Hmm, no.  After working through this, I think it's safe to say 
KVM_MEM_USER_UNMAPPABLE
is bad name because we could end up with "regular" memory that's backed by an
inaccessible (unmappable) file.

One alternative would be to call it KVM_MEM_PROTECTED.  That shouldn't cause
problems for the known use of "private" (TDX and SNP), and it gives us a little
wiggle room, e.g. if we ever get a use case where VMs can share memory that is
otherwise protected.

That's a pretty big "if" though, and odds are good we'd need more memslot flags 
and
fd+offset pairs to allow differentiating "private" vs. "protected-shared" 
without
forcing userspace to punch holes in memslots, so I don't know that hedging now 
will
buy us anything.

So I'd say that if people think KVM_MEM_PRIVATE brings additional and meaningful
clarity over KVM_MEM_PROTECTECD, then lets go with PRIVATE.  But if PROTECTED is
just as good, go with PROTECTED as it gives us a wee bit of wiggle room for the
future.

Note, regardless of what name we settle on, I think it makes to do the
KVM_PRIVATE_MEM_SLOTS => KVM_INTERNAL_MEM_SLOTS 

Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-03 Thread Chao Peng
On Tue, Aug 02, 2022 at 04:38:55PM +, Sean Christopherson wrote:
> On Tue, Aug 02, 2022, Sean Christopherson wrote:
> > I think we should avoid UNMAPPABLE even on the KVM side of things for the 
> > core
> > memslots functionality and instead be very literal, e.g.
> > 
> > KVM_HAS_FD_BASED_MEMSLOTS
> > KVM_MEM_FD_VALID
> > 
> > We'll still need KVM_HAS_USER_UNMAPPABLE_MEMORY, but it won't be tied 
> > directly to
> > the memslot.  Decoupling the two thingis will require a bit of extra work, 
> > but the
> > code impact should be quite small, e.g. explicitly query and propagate
> > MEMFILE_F_USER_INACCESSIBLE to kvm_memory_slot to track if a memslot can be 
> > private.
> > And unless I'm missing something, it won't require an additional memslot 
> > flag.
> > The biggest oddity (if we don't also add KVM_MEM_PRIVATE) is that KVM would
> > effectively ignore the hva for fd-based memslots for VM types that don't 
> > support
> > private memory, i.e. userspace can't opt out of using the fd-based backing, 
> > but that
> > doesn't seem like a deal breaker.

I actually love this idea. I don't mind adding extra code for potential
usage other than confidential VMs if we can have a workable solution for
it.

> 
> Hrm, but basing private memory on top of a generic FD_VALID would effectively 
> require
> shared memory to use hva-based memslots for confidential VMs.  That'd yield a 
> very
> weird API, e.g. non-confidential VMs could be backed entirely by fd-based 
> memslots,
> but confidential VMs would be forced to use hva-based memslots.

It would work if we can treat userspace_addr as optional for
KVM_MEM_FD_VALID, e.g. userspace can opt in to decide whether needing
the mappable part or not for a regular VM and we can enforce KVM for
confidential VMs. But the u64 type of userspace_addr doesn't allow us to
express a 'null' value so sounds like we will end up needing another
flag anyway.

In concept, we could have three cofigurations here:
  1. hva-only: without any flag and use userspace_addr;
  2. fd-only:  another new flag is needed and use fd/offset;
  3. hva/fd mixed: both userspace_addr and fd/offset is effective.
 KVM_MEM_PRIVATE is a subset of it for confidential VMs. Not sure
 regular VM also wants this.

There is no direct relationship between unmappable and fd-based since
even fd-based can also be mappable for regular VM?

> 
> Ignore this idea for now.  If there's an actual use case for generic fd-based 
> memory
> then we'll want a separate flag, fd, and offset, i.e. that support could be 
> added
> independent of KVM_MEM_PRIVATE.

If we ignore this idea now (which I'm also fine), do you still think we
need change KVM_MEM_PRIVATE to KVM_MEM_USER_UNMAPPBLE?

Thanks,
Chao



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-02 Thread Sean Christopherson
On Tue, Aug 02, 2022, Sean Christopherson wrote:
> I think we should avoid UNMAPPABLE even on the KVM side of things for the core
> memslots functionality and instead be very literal, e.g.
> 
>   KVM_HAS_FD_BASED_MEMSLOTS
>   KVM_MEM_FD_VALID
> 
> We'll still need KVM_HAS_USER_UNMAPPABLE_MEMORY, but it won't be tied 
> directly to
> the memslot.  Decoupling the two thingis will require a bit of extra work, 
> but the
> code impact should be quite small, e.g. explicitly query and propagate
> MEMFILE_F_USER_INACCESSIBLE to kvm_memory_slot to track if a memslot can be 
> private.
> And unless I'm missing something, it won't require an additional memslot flag.
> The biggest oddity (if we don't also add KVM_MEM_PRIVATE) is that KVM would
> effectively ignore the hva for fd-based memslots for VM types that don't 
> support
> private memory, i.e. userspace can't opt out of using the fd-based backing, 
> but that
> doesn't seem like a deal breaker.

Hrm, but basing private memory on top of a generic FD_VALID would effectively 
require
shared memory to use hva-based memslots for confidential VMs.  That'd yield a 
very
weird API, e.g. non-confidential VMs could be backed entirely by fd-based 
memslots,
but confidential VMs would be forced to use hva-based memslots.

Ignore this idea for now.  If there's an actual use case for generic fd-based 
memory
then we'll want a separate flag, fd, and offset, i.e. that support could be 
added
independent of KVM_MEM_PRIVATE.



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-08-01 Thread Sean Christopherson
On Fri, Jul 29, 2022, Sean Christopherson wrote:
> On Mon, Jul 25, 2022, Chao Peng wrote:
> > On Thu, Jul 21, 2022 at 05:58:50PM +, Sean Christopherson wrote:
> > > On Thu, Jul 21, 2022, Chao Peng wrote:
> > > > On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> > > > > 
> > > > > 
> > > > > On 7/21/22 00:21, Sean Christopherson wrote:
> > > > > Maybe you could tag it with cgs for all the confidential guest support
> > > > > related stuff: e.g. kvm_vm_ioctl_set_cgs_mem()
> > > > > 
> > > > > bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > > > > ...
> > > > > kvm_vm_ioctl_set_cgs_mem(, is_private)
> > > > 
> > > > If we plan to widely use such abbr. through KVM (e.g. it's well known),
> > > > I'm fine.
> > > 
> > > I'd prefer to stay away from "confidential guest", and away from any 
> > > VM-scoped
> > > name for that matter.  User-unmappable memmory has use cases beyond 
> > > hiding guest
> > > state from the host, e.g. userspace could use inaccessible/unmappable 
> > > memory to
> > > harden itself against unintentional access to guest memory.
> > > 
> > > > I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
> > > > But I also don't quite like it, it's so generic and sounds say nothing.
> > > > 
> > > > But I do want a name can cover future usages other than just 
> > > > private/shared (pKVM for example may have a third state).
> > > 
> > > I don't think there can be a third top-level state.  Memory is either 
> > > private to
> > > the guest or it's not.  There can be sub-states, e.g. memory could be 
> > > selectively
> > > shared or encrypted with a different key, in which case we'd need 
> > > metadata to
> > > track that state.
> > > 
> > > Though that begs the question of whether or not private_fd is the correct
> > > terminology.  E.g. if guest memory is backed by a memfd that can't be 
> > > mapped by
> > > userspace (currently F_SEAL_INACCESSIBLE), but something else in the 
> > > kernel plugs
> > > that memory into a device or another VM, then arguably that memory is 
> > > shared,
> > > especially the multi-VM scenario.
> > > 
> > > For TDX and SNP "private vs. shared" is likely the correct terminology 
> > > given the
> > > current specs, but for generic KVM it's probably better to align with 
> > > whatever
> > > terminology is used for memfd.  "inaccessible_fd" and 
> > > "user_inaccessible_fd" are
> > > a bit odd since the fd itself is accesible.
> > > 
> > > What about "user_unmappable"?  E.g.
> > > 
> > >   F_SEAL_USER_UNMAPPABLE, MFD_USER_UNMAPPABLE, 
> > > KVM_HAS_USER_UNMAPPABLE_MEMORY,
> > >   MEMFILE_F_USER_INACCESSIBLE, user_unmappable_fd, etc...
> > 
> > For KVM I also think user_unmappable looks better than 'private', e.g.
> > user_unmappable_fd/KVM_HAS_USER_UNMAPPABLE_MEMORY sounds more
> > appropriate names. For memfd however, I don't feel that strong to change
> > it from current 'inaccessible' to 'user_unmappable', one of the reason
> > is it's not just about unmappable, but actually also inaccessible
> > through direct ioctls like read()/write().
> 
> Heh, I _knew_ there had to be a catch.  I agree that INACCESSIBLE is better 
> for
> memfd.

Thought about this some more...

I think we should avoid UNMAPPABLE even on the KVM side of things for the core
memslots functionality and instead be very literal, e.g.

KVM_HAS_FD_BASED_MEMSLOTS
KVM_MEM_FD_VALID

We'll still need KVM_HAS_USER_UNMAPPABLE_MEMORY, but it won't be tied directly 
to
the memslot.  Decoupling the two thingis will require a bit of extra work, but 
the
code impact should be quite small, e.g. explicitly query and propagate
MEMFILE_F_USER_INACCESSIBLE to kvm_memory_slot to track if a memslot can be 
private.
And unless I'm missing something, it won't require an additional memslot flag.
The biggest oddity (if we don't also add KVM_MEM_PRIVATE) is that KVM would
effectively ignore the hva for fd-based memslots for VM types that don't support
private memory, i.e. userspace can't opt out of using the fd-based backing, but 
that
doesn't seem like a deal breaker.

Decoupling private memory from fd-based memslots will allow using fd-based 
memslots
for backing VMs even if the memory is user mappable, which opens up potentially
interesting use cases.  It would also allow testing some parts of fd-based 
memslots
with existing VMs.

The big advantage of KVM's hva-based memslots is that KVM doesn't care what's 
backing
a memslot, and so (in thoery) enabling new backing stores for KVM is free.  
It's not
always free, but at this point I think we've eliminated most of the hiccups, 
e.g. x86's
MMU should no longer require additional enlightenment to support huge pages for 
new
backing types.

On the flip-side, a big disadvantage of hva-based memslots is that KVM doesn't
_know_ what's backing a memslot.  This is one of the major reasons, if not _the_
main reason at this point, why KVM binds a VM to a single virtual address space.
Running with different hva=>pfn 

Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-29 Thread Sean Christopherson
On Mon, Jul 25, 2022, Chao Peng wrote:
> On Thu, Jul 21, 2022 at 05:58:50PM +, Sean Christopherson wrote:
> > On Thu, Jul 21, 2022, Chao Peng wrote:
> > > On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> > > > 
> > > > 
> > > > On 7/21/22 00:21, Sean Christopherson wrote:
> > > > Maybe you could tag it with cgs for all the confidential guest support
> > > > related stuff: e.g. kvm_vm_ioctl_set_cgs_mem()
> > > > 
> > > > bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > > > ...
> > > > kvm_vm_ioctl_set_cgs_mem(, is_private)
> > > 
> > > If we plan to widely use such abbr. through KVM (e.g. it's well known),
> > > I'm fine.
> > 
> > I'd prefer to stay away from "confidential guest", and away from any 
> > VM-scoped
> > name for that matter.  User-unmappable memmory has use cases beyond hiding 
> > guest
> > state from the host, e.g. userspace could use inaccessible/unmappable 
> > memory to
> > harden itself against unintentional access to guest memory.
> > 
> > > I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
> > > But I also don't quite like it, it's so generic and sounds say nothing.
> > > 
> > > But I do want a name can cover future usages other than just 
> > > private/shared (pKVM for example may have a third state).
> > 
> > I don't think there can be a third top-level state.  Memory is either 
> > private to
> > the guest or it's not.  There can be sub-states, e.g. memory could be 
> > selectively
> > shared or encrypted with a different key, in which case we'd need metadata 
> > to
> > track that state.
> > 
> > Though that begs the question of whether or not private_fd is the correct
> > terminology.  E.g. if guest memory is backed by a memfd that can't be 
> > mapped by
> > userspace (currently F_SEAL_INACCESSIBLE), but something else in the kernel 
> > plugs
> > that memory into a device or another VM, then arguably that memory is 
> > shared,
> > especially the multi-VM scenario.
> > 
> > For TDX and SNP "private vs. shared" is likely the correct terminology 
> > given the
> > current specs, but for generic KVM it's probably better to align with 
> > whatever
> > terminology is used for memfd.  "inaccessible_fd" and 
> > "user_inaccessible_fd" are
> > a bit odd since the fd itself is accesible.
> > 
> > What about "user_unmappable"?  E.g.
> > 
> >   F_SEAL_USER_UNMAPPABLE, MFD_USER_UNMAPPABLE, 
> > KVM_HAS_USER_UNMAPPABLE_MEMORY,
> >   MEMFILE_F_USER_INACCESSIBLE, user_unmappable_fd, etc...
> 
> For KVM I also think user_unmappable looks better than 'private', e.g.
> user_unmappable_fd/KVM_HAS_USER_UNMAPPABLE_MEMORY sounds more
> appropriate names. For memfd however, I don't feel that strong to change
> it from current 'inaccessible' to 'user_unmappable', one of the reason
> is it's not just about unmappable, but actually also inaccessible
> through direct ioctls like read()/write().

Heh, I _knew_ there had to be a catch.  I agree that INACCESSIBLE is better for
memfd.



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-25 Thread Chao Peng
On Thu, Jul 21, 2022 at 05:58:50PM +, Sean Christopherson wrote:
> On Thu, Jul 21, 2022, Chao Peng wrote:
> > On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> > > 
> > > 
> > > On 7/21/22 00:21, Sean Christopherson wrote:
> > > Maybe you could tag it with cgs for all the confidential guest support
> > > related stuff: e.g. kvm_vm_ioctl_set_cgs_mem()
> > > 
> > > bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > > ...
> > > kvm_vm_ioctl_set_cgs_mem(, is_private)
> > 
> > If we plan to widely use such abbr. through KVM (e.g. it's well known),
> > I'm fine.
> 
> I'd prefer to stay away from "confidential guest", and away from any VM-scoped
> name for that matter.  User-unmappable memmory has use cases beyond hiding 
> guest
> state from the host, e.g. userspace could use inaccessible/unmappable memory 
> to
> harden itself against unintentional access to guest memory.
> 
> > I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
> > But I also don't quite like it, it's so generic and sounds say nothing.
> > 
> > But I do want a name can cover future usages other than just 
> > private/shared (pKVM for example may have a third state).
> 
> I don't think there can be a third top-level state.  Memory is either private 
> to
> the guest or it's not.  There can be sub-states, e.g. memory could be 
> selectively
> shared or encrypted with a different key, in which case we'd need metadata to
> track that state.
> 
> Though that begs the question of whether or not private_fd is the correct
> terminology.  E.g. if guest memory is backed by a memfd that can't be mapped 
> by
> userspace (currently F_SEAL_INACCESSIBLE), but something else in the kernel 
> plugs
> that memory into a device or another VM, then arguably that memory is shared,
> especially the multi-VM scenario.
> 
> For TDX and SNP "private vs. shared" is likely the correct terminology given 
> the
> current specs, but for generic KVM it's probably better to align with whatever
> terminology is used for memfd.  "inaccessible_fd" and "user_inaccessible_fd" 
> are
> a bit odd since the fd itself is accesible.
> 
> What about "user_unmappable"?  E.g.
> 
>   F_SEAL_USER_UNMAPPABLE, MFD_USER_UNMAPPABLE, KVM_HAS_USER_UNMAPPABLE_MEMORY,
>   MEMFILE_F_USER_INACCESSIBLE, user_unmappable_fd, etc...

For KVM I also think user_unmappable looks better than 'private', e.g.
user_unmappable_fd/KVM_HAS_USER_UNMAPPABLE_MEMORY sounds more
appropriate names. For memfd however, I don't feel that strong to change
it from current 'inaccessible' to 'user_unmappable', one of the reason
is it's not just about unmappable, but actually also inaccessible
through direct ioctls like read()/write().

> 
> that gives us flexibility to map the memory from within the kernel, e.g. into
> other VMs or devices.
> 
> Hmm, and then keep your original "mem_attr_array" name?  And probably 
> 
>  int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
>  bool is_user_mappable)
> 
> Then the x86/mmu code for TDX/SNP private faults could be:
> 
>   is_private = !kvm_is_gpa_user_mappable();
> 
>   if (fault->is_private != is_private) {
> 
> or if we want to avoid mixing up "user_mappable" and "user_unmappable":
> 
>   is_private = kvm_is_gpa_user_unmappable();
> 
>   if (fault->is_private != is_private) {
> 
> though a helper that returns a negative (not mappable) feels kludgy.  And I 
> like
> kvm_is_gpa_user_mappable() because then when there's not "special" memory, it
> defaults to true, which is more intuitive IMO.

yes.

> 
> And then if the future needs more precision, e.g. user-unmappable memory isn't
> necessarily guest-exclusive, the uAPI names still work even though KVM 
> internals
> will need to be reworked, but that's unavoidable.  E.g. piggybacking
> KVM_MEMORY_ENCRYPT_(UN)REG_REGION doesn't allow for further differentiation,
> so we'd need to _extend_ the uAPI, but the _existing_ uAPI would still be 
> sane.

Right, that has to be extended.

Chao



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-21 Thread Sean Christopherson
On Thu, Jul 21, 2022, Chao Peng wrote:
> On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> > 
> > 
> > On 7/21/22 00:21, Sean Christopherson wrote:
> > Maybe you could tag it with cgs for all the confidential guest support
> > related stuff: e.g. kvm_vm_ioctl_set_cgs_mem()
> > 
> > bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > ...
> > kvm_vm_ioctl_set_cgs_mem(, is_private)
> 
> If we plan to widely use such abbr. through KVM (e.g. it's well known),
> I'm fine.

I'd prefer to stay away from "confidential guest", and away from any VM-scoped
name for that matter.  User-unmappable memmory has use cases beyond hiding guest
state from the host, e.g. userspace could use inaccessible/unmappable memory to
harden itself against unintentional access to guest memory.

> I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
> But I also don't quite like it, it's so generic and sounds say nothing.
> 
> But I do want a name can cover future usages other than just 
> private/shared (pKVM for example may have a third state).

I don't think there can be a third top-level state.  Memory is either private to
the guest or it's not.  There can be sub-states, e.g. memory could be 
selectively
shared or encrypted with a different key, in which case we'd need metadata to
track that state.

Though that begs the question of whether or not private_fd is the correct
terminology.  E.g. if guest memory is backed by a memfd that can't be mapped by
userspace (currently F_SEAL_INACCESSIBLE), but something else in the kernel 
plugs
that memory into a device or another VM, then arguably that memory is shared,
especially the multi-VM scenario.

For TDX and SNP "private vs. shared" is likely the correct terminology given the
current specs, but for generic KVM it's probably better to align with whatever
terminology is used for memfd.  "inaccessible_fd" and "user_inaccessible_fd" are
a bit odd since the fd itself is accesible.

What about "user_unmappable"?  E.g.

  F_SEAL_USER_UNMAPPABLE, MFD_USER_UNMAPPABLE, KVM_HAS_USER_UNMAPPABLE_MEMORY,
  MEMFILE_F_USER_INACCESSIBLE, user_unmappable_fd, etc...

that gives us flexibility to map the memory from within the kernel, e.g. into
other VMs or devices.

Hmm, and then keep your original "mem_attr_array" name?  And probably 

 int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
   bool is_user_mappable)

Then the x86/mmu code for TDX/SNP private faults could be:

is_private = !kvm_is_gpa_user_mappable();

if (fault->is_private != is_private) {

or if we want to avoid mixing up "user_mappable" and "user_unmappable":

is_private = kvm_is_gpa_user_unmappable();

if (fault->is_private != is_private) {

though a helper that returns a negative (not mappable) feels kludgy.  And I like
kvm_is_gpa_user_mappable() because then when there's not "special" memory, it
defaults to true, which is more intuitive IMO.

And then if the future needs more precision, e.g. user-unmappable memory isn't
necessarily guest-exclusive, the uAPI names still work even though KVM internals
will need to be reworked, but that's unavoidable.  E.g. piggybacking
KVM_MEMORY_ENCRYPT_(UN)REG_REGION doesn't allow for further differentiation,
so we'd need to _extend_ the uAPI, but the _existing_ uAPI would still be sane.



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-21 Thread Chao Peng
On Wed, Jul 20, 2022 at 04:44:32PM +, Sean Christopherson wrote:
> On Wed, Jul 06, 2022, Chao Peng wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 230c8ff9659c..bb714c2a4b06 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -914,6 +914,35 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
> >  
> >  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
> >  
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +#define KVM_MEM_ATTR_PRIVATE   0x0001
> > +static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int 
> > ioctl,
> > +struct kvm_enc_region *region)
> > +{
> > +   unsigned long start, end;
> 
> As alluded to in a different reply, because this will track GPAs instead of 
> HVAs,
> the type needs to be "gpa_t", not "unsigned long".  Oh, actually, they need to
> be gfn_t, since those are what gets shoved into the xarray.

It's gfn_t actually. My original purpose for this is 32bit architectures
(if any) can also work with it since index of xarrary is 32bit on those
architectures.  But kvm_enc_region is u64 so itr's even not possible.

> 
> > +   void *entry;
> > +   int r;
> > +
> > +   if (region->size == 0 || region->addr + region->size < region->addr)
> > +   return -EINVAL;
> > +   if (region->addr & (PAGE_SIZE - 1) || region->size & (PAGE_SIZE - 1))
> > +   return -EINVAL;
> > +
> > +   start = region->addr >> PAGE_SHIFT;
> > +   end = (region->addr + region->size - 1) >> PAGE_SHIFT;
> > +
> > +   entry = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION ?
> > +   xa_mk_value(KVM_MEM_ATTR_PRIVATE) : NULL;
> > +
> > +   r = xa_err(xa_store_range(>mem_attr_array, start, end,
> > +   entry, GFP_KERNEL_ACCOUNT));
> 
> IIUC, this series treats memory as shared by default.  I think we should 
> invert
> that and have KVM's ABI be that all guest memory as private by default, i.e.
> require the guest to opt into sharing memory instead of opt out of sharing 
> memory.
> 
> And then the xarray would track which regions are shared.

Maybe I missed some information discussed elsewhere? I followed
https://lkml.org/lkml/2022/5/23/772. KVM is shared by default but
userspace should set all guest memory to private before the guest
launch, guest then sees all memory as private.  While default it to
private sounds also good, if we only talk about the private/shared in
private memory context (I think so), then there is no ambiguity.

> 
> Regarding mem_attr_array, it probably makes sense to explicitly include what 
> it's
> tracking in the name, i.e. name it {private,shared}_mem_array depending on 
> whether
> it's used to track private vs. shared memory.  If we ever need to track 
> metadata
> beyond shared/private then we can tweak the name as needed, e.g. if hardware 
> ever
> supports secondary non-ephemeral encryption keys.

As I think that there may be other state beyond that. Fine with me to
just take consideration of private/shared, and it also sounds
reasonable for people who want to support that to change.

Chao



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-21 Thread Chao Peng
On Thu, Jul 21, 2022 at 03:34:59PM +0800, Wei Wang wrote:
> 
> 
> On 7/21/22 00:21, Sean Christopherson wrote:
> > On Wed, Jul 20, 2022, Gupta, Pankaj wrote:
> > > > > > > > +bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
> > Use kvm_arch_has_private_mem(), both because "has" makes it obvious this is 
> > checking
> > a flag of sorts, and to align with other helpers of this nature (and with
> > CONFIG_HAVE_KVM_PRIVATE_MEM).
> > 
> >$ git grep kvm_arch | grep supported | wc -l
> >0
> >$ git grep kvm_arch | grep has | wc -l
> >26

Make sense. kvm_arch_has_private_mem it actually better.

> > 
> > > > > > > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > > > > > > +   case KVM_MEMORY_ENCRYPT_REG_REGION:
> > > > > > > > +   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > > > > > > > +   struct kvm_enc_region region;
> > > > > > > > +
> > > > > > > > +   if (!kvm_arch_private_mem_supported(kvm))
> > > > > > > > +   goto arch_vm_ioctl;
> > > > > > > > +
> > > > > > > > +   r = -EFAULT;
> > > > > > > > +   if (copy_from_user(, argp, 
> > > > > > > > sizeof(region)))
> > > > > > > > +   goto out;
> > > > > > > > +
> > > > > > > > +   r = kvm_vm_ioctl_set_encrypted_region(kvm, 
> > > > > > > > ioctl, );
> > > > > > > this is to store private region metadata not only the encrypted 
> > > > > > > region?
> > > > > > Correct.
> > > > > Sorry for not being clear, was suggesting name change of this 
> > > > > function from:
> > > > > "kvm_vm_ioctl_set_encrypted_region" to 
> > > > > "kvm_vm_ioctl_set_private_region"
> > > > Though I don't have strong reason to change it, I'm fine with this and
> > > Yes, no strong reason, just thought "kvm_vm_ioctl_set_private_region" 
> > > would
> > > depict the actual functionality :)
> > > 
> > > > this name matches the above kvm_arch_private_mem_supported perfectly.
> > > BTW could not understand this, how "kvm_vm_ioctl_set_encrypted_region"
> > > matches "kvm_arch_private_mem_supported"?
> > Chao is saying that kvm_vm_ioctl_set_private_region() pairs nicely with
> > kvm_arch_private_mem_supported(), not that the "encrypted" variant pairs 
> > nicely.
> > 
> > I also like using "private" instead of "encrypted", though we should 
> > probably
> > find a different verb than "set", because calling "set_private" when making 
> > the
> > region shared is confusing.  I'm struggling to come up with a good 
> > alternative
> > though.
> > 
> > kvm_vm_ioctl_set_memory_region() is already taken by 
> > KVM_SET_USER_MEMORY_REGION,
> > and that also means that anything with "memory_region" in the name is bound 
> > to be
> > confusing.
> > 
> > Hmm, and if we move away from "encrypted", it probably makes sense to pass 
> > in
> > addr+size instead of a kvm_enc_region.

This makes sense.

> > 
> > Maybe this?
> > 
> > static int kvm_vm_ioctl_set_or_clear_mem_private(struct kvm *kvm, gpa_t gpa,
> >  gpa_t size, bool set_private)

Currently this should work.

> > 
> > and then:
> > 
> > #ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > case KVM_MEMORY_ENCRYPT_REG_REGION:
> > case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > struct kvm_enc_region region;
> > 
> > if (!kvm_arch_private_mem_supported(kvm))
> > goto arch_vm_ioctl;
> > 
> > r = -EFAULT;
> > if (copy_from_user(, argp, sizeof(region)))
> > goto out;
> > 
> > r = kvm_vm_ioctl_set_or_clear_mem_private(kvm, region.addr,
> >   region.size, set);
> > break;
> > }
> > #endif
> > 
> > I don't love it, so if someone has a better idea...
> > 
> Maybe you could tag it with cgs for all the confidential guest support
> related stuff:
> e.g. kvm_vm_ioctl_set_cgs_mem()
> 
> bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> ...
> kvm_vm_ioctl_set_cgs_mem(, is_private)

If we plan to widely use such abbr. through KVM (e.g. it's well known),
I'm fine.

I actually use mem_attr in patch: https://lkml.org/lkml/2022/7/20/610
But I also don't quite like it, it's so generic and sounds say nothing.

But I do want a name can cover future usages other than just 
private/shared (pKVM for example may have a third state).

Thanks,
Chao



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-21 Thread Wei Wang




On 7/21/22 00:21, Sean Christopherson wrote:

On Wed, Jul 20, 2022, Gupta, Pankaj wrote:

+bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)

Use kvm_arch_has_private_mem(), both because "has" makes it obvious this is 
checking
a flag of sorts, and to align with other helpers of this nature (and with
CONFIG_HAVE_KVM_PRIVATE_MEM).

   $ git grep kvm_arch | grep supported | wc -l
   0
   $ git grep kvm_arch | grep has | wc -l
   26


+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   case KVM_MEMORY_ENCRYPT_REG_REGION:
+   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+   struct kvm_enc_region region;
+
+   if (!kvm_arch_private_mem_supported(kvm))
+   goto arch_vm_ioctl;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(region)))
+   goto out;
+
+   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );

this is to store private region metadata not only the encrypted region?

Correct.

Sorry for not being clear, was suggesting name change of this function from:
"kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"

Though I don't have strong reason to change it, I'm fine with this and

Yes, no strong reason, just thought "kvm_vm_ioctl_set_private_region" would
depict the actual functionality :)


this name matches the above kvm_arch_private_mem_supported perfectly.

BTW could not understand this, how "kvm_vm_ioctl_set_encrypted_region"
matches "kvm_arch_private_mem_supported"?

Chao is saying that kvm_vm_ioctl_set_private_region() pairs nicely with
kvm_arch_private_mem_supported(), not that the "encrypted" variant pairs nicely.

I also like using "private" instead of "encrypted", though we should probably
find a different verb than "set", because calling "set_private" when making the
region shared is confusing.  I'm struggling to come up with a good alternative
though.

kvm_vm_ioctl_set_memory_region() is already taken by KVM_SET_USER_MEMORY_REGION,
and that also means that anything with "memory_region" in the name is bound to 
be
confusing.

Hmm, and if we move away from "encrypted", it probably makes sense to pass in
addr+size instead of a kvm_enc_region.

Maybe this?

static int kvm_vm_ioctl_set_or_clear_mem_private(struct kvm *kvm, gpa_t gpa,
 gpa_t size, bool set_private)

and then:

#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
case KVM_MEMORY_ENCRYPT_REG_REGION:
case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
struct kvm_enc_region region;

if (!kvm_arch_private_mem_supported(kvm))
goto arch_vm_ioctl;

r = -EFAULT;
if (copy_from_user(, argp, sizeof(region)))
goto out;

r = kvm_vm_ioctl_set_or_clear_mem_private(kvm, region.addr,
  region.size, set);
break;
}
#endif

I don't love it, so if someone has a better idea...

Maybe you could tag it with cgs for all the confidential guest support 
related stuff:

e.g. kvm_vm_ioctl_set_cgs_mem()

bool is_private = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
...
kvm_vm_ioctl_set_cgs_mem(, is_private)




Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-20 Thread Gupta, Pankaj




Use kvm_arch_has_private_mem(), both because "has" makes it obvious this is 
checking
a flag of sorts, and to align with other helpers of this nature (and with
CONFIG_HAVE_KVM_PRIVATE_MEM).

   $ git grep kvm_arch | grep supported | wc -l
   0
   $ git grep kvm_arch | grep has | wc -l
   26


+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   case KVM_MEMORY_ENCRYPT_REG_REGION:
+   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+   struct kvm_enc_region region;
+
+   if (!kvm_arch_private_mem_supported(kvm))
+   goto arch_vm_ioctl;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(region)))
+   goto out;
+
+   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );


this is to store private region metadata not only the encrypted region?


Correct.


Sorry for not being clear, was suggesting name change of this function from:
"kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"


Though I don't have strong reason to change it, I'm fine with this and


Yes, no strong reason, just thought "kvm_vm_ioctl_set_private_region" would
depict the actual functionality :)


this name matches the above kvm_arch_private_mem_supported perfectly.

BTW could not understand this, how "kvm_vm_ioctl_set_encrypted_region"
matches "kvm_arch_private_mem_supported"?


Chao is saying that kvm_vm_ioctl_set_private_region() pairs nicely with
kvm_arch_private_mem_supported(), not that the "encrypted" variant pairs nicely.

I also like using "private" instead of "encrypted", though we should probably
find a different verb than "set", because calling "set_private" when making the
region shared is confusing.  I'm struggling to come up with a good alternative
though.

kvm_vm_ioctl_set_memory_region() is already taken by KVM_SET_USER_MEMORY_REGION,
and that also means that anything with "memory_region" in the name is bound to 
be
confusing.

Hmm, and if we move away from "encrypted", it probably makes sense to pass in
addr+size instead of a kvm_enc_region.

Maybe this?

static int kvm_vm_ioctl_set_or_clear_mem_private(struct kvm *kvm, gpa_t gpa,
 gpa_t size, bool set_private)

and then:

#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
case KVM_MEMORY_ENCRYPT_REG_REGION:
case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
struct kvm_enc_region region;

if (!kvm_arch_private_mem_supported(kvm))
goto arch_vm_ioctl;

r = -EFAULT;
if (copy_from_user(, argp, sizeof(region)))
goto out;

r = kvm_vm_ioctl_set_or_clear_mem_private(kvm, region.addr,
  region.size, set);
break;
}
#endif

I don't love it, so if someone has a better idea...


Both the suggestions look good to me. Bring more clarity.

Thanks,
Pankaj




Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-20 Thread Sean Christopherson
On Wed, Jul 06, 2022, Chao Peng wrote:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 230c8ff9659c..bb714c2a4b06 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -914,6 +914,35 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>  
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>  
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +#define KVM_MEM_ATTR_PRIVATE 0x0001
> +static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int 
> ioctl,
> +  struct kvm_enc_region *region)
> +{
> + unsigned long start, end;

As alluded to in a different reply, because this will track GPAs instead of 
HVAs,
the type needs to be "gpa_t", not "unsigned long".  Oh, actually, they need to
be gfn_t, since those are what gets shoved into the xarray.

> + void *entry;
> + int r;
> +
> + if (region->size == 0 || region->addr + region->size < region->addr)
> + return -EINVAL;
> + if (region->addr & (PAGE_SIZE - 1) || region->size & (PAGE_SIZE - 1))
> + return -EINVAL;
> +
> + start = region->addr >> PAGE_SHIFT;
> + end = (region->addr + region->size - 1) >> PAGE_SHIFT;
> +
> + entry = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION ?
> + xa_mk_value(KVM_MEM_ATTR_PRIVATE) : NULL;
> +
> + r = xa_err(xa_store_range(>mem_attr_array, start, end,
> + entry, GFP_KERNEL_ACCOUNT));

IIUC, this series treats memory as shared by default.  I think we should invert
that and have KVM's ABI be that all guest memory as private by default, i.e.
require the guest to opt into sharing memory instead of opt out of sharing 
memory.

And then the xarray would track which regions are shared.

Regarding mem_attr_array, it probably makes sense to explicitly include what 
it's
tracking in the name, i.e. name it {private,shared}_mem_array depending on 
whether
it's used to track private vs. shared memory.  If we ever need to track metadata
beyond shared/private then we can tweak the name as needed, e.g. if hardware 
ever
supports secondary non-ephemeral encryption keys.



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-20 Thread Sean Christopherson
On Wed, Jul 20, 2022, Gupta, Pankaj wrote:
> 
> > > > > > +bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)

Use kvm_arch_has_private_mem(), both because "has" makes it obvious this is 
checking
a flag of sorts, and to align with other helpers of this nature (and with
CONFIG_HAVE_KVM_PRIVATE_MEM).

  $ git grep kvm_arch | grep supported | wc -l
  0
  $ git grep kvm_arch | grep has | wc -l
  26

> > > > > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > > > > +   case KVM_MEMORY_ENCRYPT_REG_REGION:
> > > > > > +   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > > > > > +   struct kvm_enc_region region;
> > > > > > +
> > > > > > +   if (!kvm_arch_private_mem_supported(kvm))
> > > > > > +   goto arch_vm_ioctl;
> > > > > > +
> > > > > > +   r = -EFAULT;
> > > > > > +   if (copy_from_user(, argp, sizeof(region)))
> > > > > > +   goto out;
> > > > > > +
> > > > > > +   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, 
> > > > > > );
> > > > > 
> > > > > this is to store private region metadata not only the encrypted 
> > > > > region?
> > > > 
> > > > Correct.
> > > 
> > > Sorry for not being clear, was suggesting name change of this function 
> > > from:
> > > "kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"
> > 
> > Though I don't have strong reason to change it, I'm fine with this and
> 
> Yes, no strong reason, just thought "kvm_vm_ioctl_set_private_region" would
> depict the actual functionality :)
> 
> > this name matches the above kvm_arch_private_mem_supported perfectly.
> BTW could not understand this, how "kvm_vm_ioctl_set_encrypted_region"
> matches "kvm_arch_private_mem_supported"?

Chao is saying that kvm_vm_ioctl_set_private_region() pairs nicely with
kvm_arch_private_mem_supported(), not that the "encrypted" variant pairs nicely.

I also like using "private" instead of "encrypted", though we should probably
find a different verb than "set", because calling "set_private" when making the
region shared is confusing.  I'm struggling to come up with a good alternative
though.

kvm_vm_ioctl_set_memory_region() is already taken by KVM_SET_USER_MEMORY_REGION,
and that also means that anything with "memory_region" in the name is bound to 
be
confusing.

Hmm, and if we move away from "encrypted", it probably makes sense to pass in
addr+size instead of a kvm_enc_region.

Maybe this?

static int kvm_vm_ioctl_set_or_clear_mem_private(struct kvm *kvm, gpa_t gpa,
 gpa_t size, bool set_private)

and then:

#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
case KVM_MEMORY_ENCRYPT_REG_REGION:
case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
struct kvm_enc_region region;

if (!kvm_arch_private_mem_supported(kvm))
goto arch_vm_ioctl;

r = -EFAULT;
if (copy_from_user(, argp, sizeof(region)))
goto out;

r = kvm_vm_ioctl_set_or_clear_mem_private(kvm, region.addr,
  region.size, set);
break;
}
#endif

I don't love it, so if someone has a better idea...



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-20 Thread Gupta, Pankaj




+bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
+{
+   return false;
+}


Does this function has to be overriden by SEV and TDX to support the private
regions?


Yes it should be overridden by architectures which want to support it.


o.k





+
static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -4689,6 +4729,22 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_set_memory_region(kvm, );
break;
}
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   case KVM_MEMORY_ENCRYPT_REG_REGION:
+   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+   struct kvm_enc_region region;
+
+   if (!kvm_arch_private_mem_supported(kvm))
+   goto arch_vm_ioctl;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(region)))
+   goto out;
+
+   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );


this is to store private region metadata not only the encrypted region?


Correct.


Sorry for not being clear, was suggesting name change of this function from:
"kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"


Though I don't have strong reason to change it, I'm fine with this and


Yes, no strong reason, just thought "kvm_vm_ioctl_set_private_region" 
would depict the actual functionality :)



this name matches the above kvm_arch_private_mem_supported perfectly.

BTW could not understand this, how "kvm_vm_ioctl_set_encrypted_region"
matches "kvm_arch_private_mem_supported"?

Thanks,
Pankaj



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-20 Thread Chao Peng
On Tue, Jul 19, 2022 at 04:23:52PM +0200, Gupta, Pankaj wrote:
> 
> > > > +bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
> > > > +{
> > > > +   return false;
> > > > +}
> > > 
> > > Does this function has to be overriden by SEV and TDX to support the 
> > > private
> > > regions?
> > 
> > Yes it should be overridden by architectures which want to support it.
> 
> o.k
> > 
> > > 
> > > > +
> > > >static int check_memory_region_flags(const struct 
> > > > kvm_user_mem_region *mem)
> > > >{
> > > > u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> > > > @@ -4689,6 +4729,22 @@ static long kvm_vm_ioctl(struct file *filp,
> > > > r = kvm_vm_ioctl_set_memory_region(kvm, );
> > > > break;
> > > > }
> > > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > > +   case KVM_MEMORY_ENCRYPT_REG_REGION:
> > > > +   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > > > +   struct kvm_enc_region region;
> > > > +
> > > > +   if (!kvm_arch_private_mem_supported(kvm))
> > > > +   goto arch_vm_ioctl;
> > > > +
> > > > +   r = -EFAULT;
> > > > +   if (copy_from_user(, argp, sizeof(region)))
> > > > +   goto out;
> > > > +
> > > > +   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, 
> > > > );
> > > 
> > > this is to store private region metadata not only the encrypted region?
> > 
> > Correct.
> 
> Sorry for not being clear, was suggesting name change of this function from:
> "kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"

Though I don't have strong reason to change it, I'm fine with this and
this name matches the above kvm_arch_private_mem_supported perfectly.

Thanks,
Chao
> 
> > 
> > > 
> > > Also, seems same ioctl can be used to put other regions (e.g firmware, 
> > > later
> > > maybe DAX backend etc) into private memory?
> > 
> > Possibly. Depends on what exactly the semantics is. If just want to set
> > those regions as private current code already support that.
> 
> Agree. Sure!
> 
> 
> Thanks,
> Pankaj



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-19 Thread Gupta, Pankaj




+bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
+{
+   return false;
+}


Does this function has to be overriden by SEV and TDX to support the private
regions?


Yes it should be overridden by architectures which want to support it.


o.k





+
   static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
   {
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -4689,6 +4729,22 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_set_memory_region(kvm, );
break;
}
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   case KVM_MEMORY_ENCRYPT_REG_REGION:
+   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+   struct kvm_enc_region region;
+
+   if (!kvm_arch_private_mem_supported(kvm))
+   goto arch_vm_ioctl;
+
+   r = -EFAULT;
+   if (copy_from_user(, argp, sizeof(region)))
+   goto out;
+
+   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );


this is to store private region metadata not only the encrypted region?


Correct.


Sorry for not being clear, was suggesting name change of this function from:
"kvm_vm_ioctl_set_encrypted_region" to "kvm_vm_ioctl_set_private_region"





Also, seems same ioctl can be used to put other regions (e.g firmware, later
maybe DAX backend etc) into private memory?


Possibly. Depends on what exactly the semantics is. If just want to set
those regions as private current code already support that.


Agree. Sure!


Thanks,
Pankaj



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-19 Thread Chao Peng
On Tue, Jul 19, 2022 at 10:00:23AM +0200, Gupta, Pankaj wrote:

...

> > +bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
> > +{
> > +   return false;
> > +}
> 
> Does this function has to be overriden by SEV and TDX to support the private
> regions?

Yes it should be overridden by architectures which want to support it.

> 
> > +
> >   static int check_memory_region_flags(const struct kvm_user_mem_region 
> > *mem)
> >   {
> > u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> > @@ -4689,6 +4729,22 @@ static long kvm_vm_ioctl(struct file *filp,
> > r = kvm_vm_ioctl_set_memory_region(kvm, );
> > break;
> > }
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +   case KVM_MEMORY_ENCRYPT_REG_REGION:
> > +   case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > +   struct kvm_enc_region region;
> > +
> > +   if (!kvm_arch_private_mem_supported(kvm))
> > +   goto arch_vm_ioctl;
> > +
> > +   r = -EFAULT;
> > +   if (copy_from_user(, argp, sizeof(region)))
> > +   goto out;
> > +
> > +   r = kvm_vm_ioctl_set_encrypted_region(kvm, ioctl, );
> 
> this is to store private region metadata not only the encrypted region?

Correct.

> 
> Also, seems same ioctl can be used to put other regions (e.g firmware, later
> maybe DAX backend etc) into private memory?

Possibly. Depends on what exactly the semantics is. If just want to set
those regions as private current code already support that.

Chao
> 
> > +   break;
> > +   }
> > +#endif
> > case KVM_GET_DIRTY_LOG: {
> > struct kvm_dirty_log log;
> > @@ -4842,6 +4898,7 @@ static long kvm_vm_ioctl(struct file *filp,
> > r = kvm_vm_ioctl_get_stats_fd(kvm);
> > break;
> > default:
> > +arch_vm_ioctl:
> > r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> > }
> >   out:
> 



Re: [PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-19 Thread Gupta, Pankaj

Hi Chao,

Some comments below:


If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
ioctls. The patch reuses existing SEV ioctl but differs that the
address in the region for private memory is gpa while SEV case it's hva.

The private memory region is stored as xarray in KVM for memory
efficiency in normal usages and zapping existing memory mappings is also
a side effect of these two ioctls.

Signed-off-by: Chao Peng 
---
  Documentation/virt/kvm/api.rst  | 17 +++---
  arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/Kconfig|  1 +
  arch/x86/kvm/mmu.h  |  2 --
  include/linux/kvm_host.h|  8 +
  virt/kvm/kvm_main.c | 57 +
  6 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 5ecfc7fbe0ee..dfb4caecab73 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4715,10 +4715,19 @@ Documentation/virt/kvm/amd-memory-encryption.rst.
  This ioctl can be used to register a guest memory region which may
  contain encrypted data (e.g. guest RAM, SMRAM etc).
  
-It is used in the SEV-enabled guest. When encryption is enabled, a guest

-memory region may contain encrypted data. The SEV memory encryption
-engine uses a tweak such that two identical plaintext pages, each at
-different locations will have differing ciphertexts. So swapping or
+Currently this ioctl supports registering memory regions for two usages:
+private memory and SEV-encrypted memory.
+
+When private memory is enabled, this ioctl is used to register guest private
+memory region and the addr/size of kvm_enc_region represents guest physical
+address (GPA). In this usage, this ioctl zaps the existing guest memory
+mappings in KVM that fallen into the region.
+
+When SEV-encrypted memory is enabled, this ioctl is used to register guest
+memory region which may contain encrypted data for a SEV-enabled guest. The
+addr/size of kvm_enc_region represents userspace address (HVA). The SEV
+memory encryption engine uses a tweak such that two identical plaintext pages,
+each at different locations will have differing ciphertexts. So swapping or
  moving ciphertext of those pages will not result in plaintext being
  swapped. So relocating (or migrating) physical backing pages for the SEV
  guest will require some additional steps.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dae190e19fce..92120e3a224e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,6 +37,7 @@
  #include 
  
  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS

+#define __KVM_HAVE_ZAP_GFN_RANGE
  
  #define KVM_MAX_VCPUS 1024
  
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index 1f160801e2a7..05861b9656a4 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,7 @@ config KVM
select HAVE_KVM_PM_NOTIFIER if PM
select HAVE_KVM_PRIVATE_MEM if X86_64
select MEMFILE_NOTIFIER if HAVE_KVM_PRIVATE_MEM
+   select XARRAY_MULTI if HAVE_KVM_PRIVATE_MEM
help
  Support hosting fully virtualized guest machines using hardware
  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a99acec925eb..428cd2e88cbd 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -209,8 +209,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, 
struct kvm_mmu *mmu,
return -(u32)fault & errcode;
  }
  
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);

-
  int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
  
  int kvm_mmu_post_init_vm(struct kvm *kvm);

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1b203c8aa696..da33f8828456 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -260,6 +260,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct 
kvm_gfn_range *range);
  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
  #endif
  
+#ifdef __KVM_HAVE_ZAP_GFN_RANGE

+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+#endif
+
  enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
@@ -795,6 +799,9 @@ struct kvm {
struct notifier_block pm_notifier;
  #endif
char stats_id[KVM_STATS_NAME_SIZE];
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   struct xarray mem_attr_array;
+#endif
  };
  
  #define kvm_err(fmt, ...) \

@@ -1459,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu 
*vcpu);
  int kvm_arch_post_init_vm(struct kvm *kvm);
  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
  int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+bool kvm_arch_private_mem_supported(struct kvm *kvm);
  
  #ifndef __KVM_HAVE_ARCH_VM_ALLOC

  /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

[PATCH v7 11/14] KVM: Register/unregister the guest private memory regions

2022-07-06 Thread Chao Peng
If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
ioctls. The patch reuses existing SEV ioctl but differs that the
address in the region for private memory is gpa while SEV case it's hva.

The private memory region is stored as xarray in KVM for memory
efficiency in normal usages and zapping existing memory mappings is also
a side effect of these two ioctls.

Signed-off-by: Chao Peng 
---
 Documentation/virt/kvm/api.rst  | 17 +++---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Kconfig|  1 +
 arch/x86/kvm/mmu.h  |  2 --
 include/linux/kvm_host.h|  8 +
 virt/kvm/kvm_main.c | 57 +
 6 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 5ecfc7fbe0ee..dfb4caecab73 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4715,10 +4715,19 @@ Documentation/virt/kvm/amd-memory-encryption.rst.
 This ioctl can be used to register a guest memory region which may
 contain encrypted data (e.g. guest RAM, SMRAM etc).
 
-It is used in the SEV-enabled guest. When encryption is enabled, a guest
-memory region may contain encrypted data. The SEV memory encryption
-engine uses a tweak such that two identical plaintext pages, each at
-different locations will have differing ciphertexts. So swapping or
+Currently this ioctl supports registering memory regions for two usages:
+private memory and SEV-encrypted memory.
+
+When private memory is enabled, this ioctl is used to register guest private
+memory region and the addr/size of kvm_enc_region represents guest physical
+address (GPA). In this usage, this ioctl zaps the existing guest memory
+mappings in KVM that fallen into the region.
+
+When SEV-encrypted memory is enabled, this ioctl is used to register guest
+memory region which may contain encrypted data for a SEV-enabled guest. The
+addr/size of kvm_enc_region represents userspace address (HVA). The SEV
+memory encryption engine uses a tweak such that two identical plaintext pages,
+each at different locations will have differing ciphertexts. So swapping or
 moving ciphertext of those pages will not result in plaintext being
 swapped. So relocating (or migrating) physical backing pages for the SEV
 guest will require some additional steps.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dae190e19fce..92120e3a224e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,6 +37,7 @@
 #include 
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+#define __KVM_HAVE_ZAP_GFN_RANGE
 
 #define KVM_MAX_VCPUS 1024
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1f160801e2a7..05861b9656a4 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,7 @@ config KVM
select HAVE_KVM_PM_NOTIFIER if PM
select HAVE_KVM_PRIVATE_MEM if X86_64
select MEMFILE_NOTIFIER if HAVE_KVM_PRIVATE_MEM
+   select XARRAY_MULTI if HAVE_KVM_PRIVATE_MEM
help
  Support hosting fully virtualized guest machines using hardware
  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a99acec925eb..428cd2e88cbd 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -209,8 +209,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, 
struct kvm_mmu *mmu,
return -(u32)fault & errcode;
 }
 
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
-
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_post_init_vm(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1b203c8aa696..da33f8828456 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -260,6 +260,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct 
kvm_gfn_range *range);
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 #endif
 
+#ifdef __KVM_HAVE_ZAP_GFN_RANGE
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+#endif
+
 enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
@@ -795,6 +799,9 @@ struct kvm {
struct notifier_block pm_notifier;
 #endif
char stats_id[KVM_STATS_NAME_SIZE];
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+   struct xarray mem_attr_array;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -1459,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu 
*vcpu);
 int kvm_arch_post_init_vm(struct kvm *kvm);
 void kvm_arch_pre_destroy_vm(struct kvm *kvm);
 int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+bool kvm_arch_private_mem_supported(struct kvm *kvm);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 230c8ff9659c..bb714c2a4b06 100644
--- a/virt/kvm/kvm_main.c
+++