Re: [PATCH 08/34] KVM: Introduce KVM_SET_USER_MEMORY_REGION2

2023-11-06 Thread Yuan Yao
On Sun, Nov 05, 2023 at 05:30:11PM +0100, Paolo Bonzini wrote:
> From: Sean Christopherson 
>
> Introduce a "version 2" of KVM_SET_USER_MEMORY_REGION so that additional
> information can be supplied without setting userspace up to fail.  The
> padding in the new kvm_userspace_memory_region2 structure will be used to
> pass a file descriptor in addition to the userspace_addr, i.e. allow
> userspace to point at a file descriptor and map memory into a guest that
> is NOT mapped into host userspace.
>
> Alternatively, KVM could simply add "struct kvm_userspace_memory_region2"
> without a new ioctl(), but as Paolo pointed out, adding a new ioctl()
> makes detection of bad flags a bit more robust, e.g. if the new fd field
> is guarded only by a flag and not a new ioctl(), then a userspace bug
> (setting a "bad" flag) would generate out-of-bounds access instead of an
> -EINVAL error.
>
> Cc: Jarkko Sakkinen 
> Reviewed-by: Paolo Bonzini 
> Reviewed-by: Xiaoyao Li 
> Signed-off-by: Sean Christopherson 
> Reviewed-by: Fuad Tabba 
> Tested-by: Fuad Tabba 
> Message-Id: <20231027182217.3615211-9-sea...@google.com>
> Signed-off-by: Paolo Bonzini 
> ---
>  Documentation/virt/kvm/api.rst | 22 +
>  arch/x86/kvm/x86.c |  2 +-
>  include/linux/kvm_host.h   |  4 +--
>  include/uapi/linux/kvm.h   | 13 
>  virt/kvm/kvm_main.c| 57 +-
>  5 files changed, 87 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 7025b3751027..bdea1423c5f8 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -1340,6 +1340,7 @@ yet and must be cleared on entry.
>   __u64 guest_phys_addr;
>   __u64 memory_size; /* bytes */
>   __u64 userspace_addr; /* start of the userspace allocated memory */
> + __u64 pad[16];

Looks incorrect to add padding part in kvm_userspace_memory_region,
only need to apply on kvm_userspace_memory_region2 below.

>};
>
>/* for kvm_userspace_memory_region::flags */
> @@ -6192,6 +6193,27 @@ to know what fields can be changed for the system 
> register described by
>  ``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a
>  superset of the features supported by the system.
>
> +4.140 KVM_SET_USER_MEMORY_REGION2
> +-
> +
> +:Capability: KVM_CAP_USER_MEMORY2
> +:Architectures: all
> +:Type: vm ioctl
> +:Parameters: struct kvm_userspace_memory_region2 (in)
> +:Returns: 0 on success, -1 on error
> +
> +::
> +
> +  struct kvm_userspace_memory_region2 {
> + __u32 slot;
> + __u32 flags;
> + __u64 guest_phys_addr;
> + __u64 memory_size; /* bytes */
> + __u64 userspace_addr; /* start of the userspace allocated memory */
> +  };
> +
> +See KVM_SET_USER_MEMORY_REGION.
> +
>  5. The kvm_run structure
>  
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2c924075f6f1..7b389f27dffc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12576,7 +12576,7 @@ void __user * __x86_set_memory_region(struct kvm 
> *kvm, int id, gpa_t gpa,
>   }
>
>   for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> - struct kvm_userspace_memory_region m;
> + struct kvm_userspace_memory_region2 m;
>
>   m.slot = id | (i << 16);
>   m.flags = 0;
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5faba69403ac..4e741ff27af3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1146,9 +1146,9 @@ enum kvm_mr_change {
>  };
>
>  int kvm_set_memory_region(struct kvm *kvm,
> -   const struct kvm_userspace_memory_region *mem);
> +   const struct kvm_userspace_memory_region2 *mem);
>  int __kvm_set_memory_region(struct kvm *kvm,
> - const struct kvm_userspace_memory_region *mem);
> + const struct kvm_userspace_memory_region2 *mem);
>  void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
>  void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
>  int kvm_arch_prepare_memory_region(struct kvm *kvm,
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 211b86de35ac..308cc70bd6ab 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -95,6 +95,16 @@ struct kvm_userspace_memory_region {
>   __u64 userspace_addr; /* start of the userspace allocated memory */
>  };
>
> +/* for KVM_SET_USER_MEMORY_REGION2 */
> +struct kvm_userspace_memory_region2 {
> + __u32 slot;
> + __u32 flags;
> + __u64 guest_phys_addr;
> + __u64 memory_size;
> + __u64 userspace_addr;
> + __u64 pad[16];
> +};
> +
>  /*
>   * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
>   * userspace, other bits are reserved for kvm internal use which are defined
> @@ -1201,6 +1211,7 @@ 

Re: [RFC PATCH v11 12/29] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory

2023-07-21 Thread Yuan Yao
On Tue, Jul 18, 2023 at 04:44:55PM -0700, Sean Christopherson wrote:
> TODO
>
> Cc: Fuad Tabba 
> Cc: Vishal Annapurve 
> Cc: Ackerley Tng 
> Cc: Jarkko Sakkinen 
> Cc: Maciej Szmigiero 
> Cc: Vlastimil Babka 
> Cc: David Hildenbrand 
> Cc: Quentin Perret 
> Cc: Michael Roth 
> Cc: Wang 
> Cc: Liam Merwick 
> Cc: Isaku Yamahata 
> Co-developed-by: Kirill A. Shutemov 
> Signed-off-by: Kirill A. Shutemov 
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Co-developed-by: Chao Peng 
> Signed-off-by: Chao Peng 
> Co-developed-by: Ackerley Tng 
> Signed-off-by: Ackerley Tng 
> Signed-off-by: Sean Christopherson 
> ---
>  include/linux/kvm_host.h   |  48 +++
>  include/uapi/linux/kvm.h   |  14 +-
>  include/uapi/linux/magic.h |   1 +
>  virt/kvm/Kconfig   |   4 +
>  virt/kvm/Makefile.kvm  |   1 +
>  virt/kvm/guest_mem.c   | 591 +
>  virt/kvm/kvm_main.c|  58 +++-
>  virt/kvm/kvm_mm.h  |  38 +++
>  8 files changed, 750 insertions(+), 5 deletions(-)
>  create mode 100644 virt/kvm/guest_mem.c
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 97db63da6227..0d1e2ee8ae7a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -592,8 +592,20 @@ struct kvm_memory_slot {
>   u32 flags;
>   short id;
>   u16 as_id;
> +
> +#ifdef CONFIG_KVM_PRIVATE_MEM
> + struct {
> + struct file __rcu *file;
> + pgoff_t pgoff;
> + } gmem;
> +#endif
>  };
>
> +static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot 
> *slot)
> +{
> + return slot && (slot->flags & KVM_MEM_PRIVATE);
> +}
> +
>  static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot 
> *slot)
>  {
>   return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
> @@ -688,6 +700,17 @@ static inline int kvm_arch_vcpu_memslots_id(struct 
> kvm_vcpu *vcpu)
>  }
>  #endif
>
> +/*
> + * Arch code must define kvm_arch_has_private_mem if support for private 
> memory
> + * is enabled.
> + */
> +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> +static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> +{
> + return false;
> +}
> +#endif
> +
>  struct kvm_memslots {
>   u64 generation;
>   atomic_long_t last_used_slot;
> @@ -1380,6 +1403,7 @@ void *kvm_mmu_memory_cache_alloc(struct 
> kvm_mmu_memory_cache *mc);
>  void kvm_mmu_invalidate_begin(struct kvm *kvm);
>  void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
>  void kvm_mmu_invalidate_end(struct kvm *kvm);
> +bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
>
>  long kvm_arch_dev_ioctl(struct file *filp,
>   unsigned int ioctl, unsigned long arg);
> @@ -2313,6 +2337,30 @@ static inline unsigned long 
> kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
>
>  bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
>struct kvm_gfn_range *range);
> +
> +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> +{
> + return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
> +kvm_get_memory_attributes(kvm, gfn) & 
> KVM_MEMORY_ATTRIBUTE_PRIVATE;
> +}
> +#else
> +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> +{
> + return false;
> +}
>  #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
>
> +#ifdef CONFIG_KVM_PRIVATE_MEM
> +int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
> +   gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
> +#else
> +static inline int kvm_gmem_get_pfn(struct kvm *kvm,
> +struct kvm_memory_slot *slot, gfn_t gfn,
> +kvm_pfn_t *pfn, int *max_order)
> +{
> + KVM_BUG_ON(1, kvm);
> + return -EIO;
> +}
> +#endif /* CONFIG_KVM_PRIVATE_MEM */
> +
>  #endif
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index f065c57db327..9b344fc98598 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -102,7 +102,10 @@ struct kvm_userspace_memory_region2 {
>   __u64 guest_phys_addr;
>   __u64 memory_size;
>   __u64 userspace_addr;
> - __u64 pad[16];
> + __u64 gmem_offset;
> + __u32 gmem_fd;
> + __u32 pad1;
> + __u64 pad2[14];
>  };
>
>  /*
> @@ -112,6 +115,7 @@ struct kvm_userspace_memory_region2 {
>   */
>  #define KVM_MEM_LOG_DIRTY_PAGES  (1UL << 0)
>  #define KVM_MEM_READONLY (1UL << 1)
> +#define KVM_MEM_PRIVATE  (1UL << 2)
>
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> @@ -2284,4 +2288,12 @@ struct kvm_memory_attributes {
>
>  #define KVM_MEMORY_ATTRIBUTE_PRIVATE   (1ULL << 3)
>
> +#define KVM_CREATE_GUEST_MEMFD   _IOWR(KVMIO,  0xd4, struct 
> kvm_create_guest_memfd)
> +
> +struct kvm_create_guest_memfd {
> + __u64 size;
> + __u64 flags;
> + __u64 reserved[6];
> +};
> +
>  #endif /* __LINUX_KVM_H */

Re: [RFC PATCH v11 08/29] KVM: Introduce per-page memory attributes

2023-07-20 Thread Yuan Yao
On Tue, Jul 18, 2023 at 04:44:51PM -0700, Sean Christopherson wrote:
> From: Chao Peng 
>
> In confidential computing usages, whether a page is private or shared is
> necessary information for KVM to perform operations like page fault
> handling, page zapping etc. There are other potential use cases for
> per-page memory attributes, e.g. to make memory read-only (or no-exec,
> or exec-only, etc.) without having to modify memslots.
>
> Introduce two ioctls (advertised by KVM_CAP_MEMORY_ATTRIBUTES) to allow
> userspace to operate on the per-page memory attributes.
>   - KVM_SET_MEMORY_ATTRIBUTES to set the per-page memory attributes to
> a guest memory range.
>   - KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES to return the KVM supported
> memory attributes.
>
> Use an xarray to store the per-page attributes internally, with a naive,
> not fully optimized implementation, i.e. prioritize correctness over
> performance for the initial implementation.
>
> Because setting memory attributes is roughly analogous to mprotect() on
> memory that is mapped into the guest, zap existing mappings prior to
> updating the memory attributes.  Opportunistically provide an arch hook
> for the post-set path (needed to complete invalidation anyways) in
> anticipation of x86 needing the hook to update metadata related to
> determining whether or not a given gfn can be backed with various sizes
> of hugepages.
>
> It's possible that future usages may not require an invalidation, e.g.
> if KVM ends up supporting RWX protections and userspace grants _more_
> protections, but again opt for simplicity and punt optimizations to
> if/when they are needed.
>
> Suggested-by: Sean Christopherson 
> Link: https://lore.kernel.org/all/y2wb48kd0j4vg...@google.com
> Cc: Fuad Tabba 
> Signed-off-by: Chao Peng 
> Co-developed-by: Sean Christopherson 
> Signed-off-by: Sean Christopherson 
> ---
>  Documentation/virt/kvm/api.rst |  60 
>  include/linux/kvm_host.h   |  14 +++
>  include/uapi/linux/kvm.h   |  14 +++
>  virt/kvm/Kconfig   |   4 +
>  virt/kvm/kvm_main.c| 170 +
>  5 files changed, 262 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 34d4ce66e0c8..0ca8561775ac 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6068,6 +6068,56 @@ writes to the CNTVCT_EL0 and CNTPCT_EL0 registers 
> using the SET_ONE_REG
>  interface. No error will be returned, but the resulting offset will not be
>  applied.
>
> +4.139 KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES
> +-
> +
> +:Capability: KVM_CAP_MEMORY_ATTRIBUTES
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: u64 memory attributes bitmask(out)
> +:Returns: 0 on success, <0 on error
> +
> +Returns supported memory attributes bitmask. Supported memory attributes will
> +have the corresponding bits set in u64 memory attributes bitmask.
> +
> +The following memory attributes are defined::
> +
> +  #define KVM_MEMORY_ATTRIBUTE_PRIVATE   (1ULL << 3)
> +
> +4.140 KVM_SET_MEMORY_ATTRIBUTES
> +-
> +
> +:Capability: KVM_CAP_MEMORY_ATTRIBUTES
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_memory_attributes(in/out)
> +:Returns: 0 on success, <0 on error
> +
> +Sets memory attributes for pages in a guest memory range. Parameters are
> +specified via the following structure::
> +
> +  struct kvm_memory_attributes {
> + __u64 address;
> + __u64 size;
> + __u64 attributes;
> + __u64 flags;
> +  };
> +
> +The user sets the per-page memory attributes to a guest memory range 
> indicated
> +by address/size, and in return KVM adjusts address and size to reflect the
> +actual pages of the memory range have been successfully set to the 
> attributes.
> +If the call returns 0, "address" is updated to the last successful address + 
> 1
> +and "size" is updated to the remaining address size that has not been set
> +successfully. The user should check the return value as well as the size to
> +decide if the operation succeeded for the whole range or not. The user may 
> want
> +to retry the operation with the returned address/size if the previous range 
> was
> +partially successful.
> +
> +Both address and size should be page aligned and the supported attributes 
> can be
> +retrieved with KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES.
> +
> +The "flags" field may be used for future extensions and should be set to 0s.
> +
>  5. The kvm_run structure
>  
>
> @@ -8494,6 +8544,16 @@ block sizes is exposed in 
> KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
>  64-bit bitmap (each bit describing a block size). The default value is
>  0, to disable the eager page splitting.
>
> +8.41 KVM_CAP_MEMORY_ATTRIBUTES
> +--
> +
> +:Capability: KVM_CAP_MEMORY_ATTRIBUTES
> +:Architectures: x86
> +:Type: vm
> +
> +This 

Re: [RFC PATCH v11 05/29] KVM: Convert KVM_ARCH_WANT_MMU_NOTIFIER to CONFIG_KVM_GENERIC_MMU_NOTIFIER

2023-07-19 Thread Yuan Yao
On Wed, Jul 19, 2023 at 07:15:09AM -0700, Sean Christopherson wrote:
> On Wed, Jul 19, 2023, Yuan Yao wrote:
> > On Tue, Jul 18, 2023 at 04:44:48PM -0700, Sean Christopherson wrote:
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 90a0be261a5c..d2d3e083ec7f 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -255,7 +255,9 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t 
> > > cr2_or_gpa,
> > >  int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
> > >  #endif
> > >
> > > -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
> > > +struct kvm_gfn_range;
> >
> > Not sure why a declaration here, it's defined for ARCHs which defined
> > KVM_ARCH_WANT_MMU_NOTIFIER before.
>
> The forward declaration exists to handle cases where CONFIG_KVM=n, 
> specifically
> arch/powerpc/include/asm/kvm_ppc.h's declaration of hooks to forward calls to
> uarch modules:
>
>   bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
>   bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
>   bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
>   bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
>
> Prior to using a Kconfig, a forward declaration wasn't necessary because
> arch/powerpc/include/asm/kvm_host.h would #define KVM_ARCH_WANT_MMU_NOTIFIER 
> even
> if CONFIG_KVM=n.
>
> Alternatively, kvm_ppc.h could declare the struct.  I went this route mainly 
> to
> avoid the possibility of someone encountering the same problem on a different
> architecture.

Ah I see, thanks for your explanation!


Re: [RFC PATCH v11 07/29] KVM: Add KVM_EXIT_MEMORY_FAULT exit

2023-07-19 Thread Yuan Yao
On Tue, Jul 18, 2023 at 04:44:50PM -0700, Sean Christopherson wrote:
> From: Chao Peng 
>
> This new KVM exit allows userspace to handle memory-related errors. It
> indicates an error happens in KVM at guest memory range [gpa, gpa+size).
> The flags includes additional information for userspace to handle the
> error. Currently bit 0 is defined as 'private memory' where '1'
> indicates error happens due to private memory access and '0' indicates
> error happens due to shared memory access.

Now it's bit 3:

#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)

I remember some other attributes were introduced in v10 yet:

#define KVM_MEMORY_ATTRIBUTE_READ  (1ULL << 0)
#define KVM_MEMORY_ATTRIBUTE_WRITE (1ULL << 1)
#define KVM_MEMORY_ATTRIBUTE_EXECUTE   (1ULL << 2)
#define KVM_MEMORY_ATTRIBUTE_PRIVATE   (1ULL << 3)

So KVM_MEMORY_EXIT_FLAG_PRIVATE changed to bit 3 due to above things,
or other reason ? (Sorry I didn't follow v10 too much before).

>
> When private memory is enabled, this new exit will be used for KVM to
> exit to userspace for shared <-> private memory conversion in memory
> encryption usage. In such usage, typically there are two kind of memory
> conversions:
>   - explicit conversion: happens when guest explicitly calls into KVM
> to map a range (as private or shared), KVM then exits to userspace
> to perform the map/unmap operations.
>   - implicit conversion: happens in KVM page fault handler where KVM
> exits to userspace for an implicit conversion when the page is in a
> different state than requested (private or shared).
>
> Suggested-by: Sean Christopherson 
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> Reviewed-by: Fuad Tabba 
> Tested-by: Fuad Tabba 
> Signed-off-by: Sean Christopherson 
> ---
>  Documentation/virt/kvm/api.rst | 22 ++
>  include/uapi/linux/kvm.h   |  8 
>  2 files changed, 30 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index c0ddd3035462..34d4ce66e0c8 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6700,6 +6700,28 @@ array field represents return values. The userspace 
> should update the return
>  values of SBI call before resuming the VCPU. For more details on RISC-V SBI
>  spec refer, https://github.com/riscv/riscv-sbi-doc.
>
> +::
> +
> + /* KVM_EXIT_MEMORY_FAULT */
> + struct {
> +  #define KVM_MEMORY_EXIT_FLAG_PRIVATE   (1ULL << 3)
> + __u64 flags;
> + __u64 gpa;
> + __u64 size;
> + } memory;
> +
> +If exit reason is KVM_EXIT_MEMORY_FAULT then it indicates that the VCPU has
> +encountered a memory error which is not handled by KVM kernel module and
> +userspace may choose to handle it. The 'flags' field indicates the memory
> +properties of the exit.
> +
> + - KVM_MEMORY_EXIT_FLAG_PRIVATE - indicates the memory error is caused by
> +   private memory access when the bit is set. Otherwise the memory error is
> +   caused by shared memory access when the bit is clear.
> +
> +'gpa' and 'size' indicate the memory range the error occurs at. The userspace
> +may handle the error and return to KVM to retry the previous memory access.
> +
>  ::
>
>  /* KVM_EXIT_NOTIFY */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4d4b3de8ac55..6c6ed214b6ac 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -274,6 +274,7 @@ struct kvm_xen_exit {
>  #define KVM_EXIT_RISCV_SBI35
>  #define KVM_EXIT_RISCV_CSR36
>  #define KVM_EXIT_NOTIFY   37
> +#define KVM_EXIT_MEMORY_FAULT 38
>
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> @@ -520,6 +521,13 @@ struct kvm_run {
>  #define KVM_NOTIFY_CONTEXT_INVALID   (1 << 0)
>   __u32 flags;
>   } notify;
> + /* KVM_EXIT_MEMORY_FAULT */
> + struct {
> +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
> + __u64 flags;
> + __u64 gpa;
> + __u64 size;
> + } memory;
>   /* Fix the size of the union. */
>   char padding[256];
>   };
> --
> 2.41.0.255.g8b1d071c50-goog
>


Re: [RFC PATCH v11 05/29] KVM: Convert KVM_ARCH_WANT_MMU_NOTIFIER to CONFIG_KVM_GENERIC_MMU_NOTIFIER

2023-07-19 Thread Yuan Yao
On Tue, Jul 18, 2023 at 04:44:48PM -0700, Sean Christopherson wrote:
> Signed-off-by: Sean Christopherson 
> ---
>  arch/arm64/include/asm/kvm_host.h   |  2 --
>  arch/arm64/kvm/Kconfig  |  2 +-
>  arch/mips/include/asm/kvm_host.h|  2 --
>  arch/mips/kvm/Kconfig   |  2 +-
>  arch/powerpc/include/asm/kvm_host.h |  2 --
>  arch/powerpc/kvm/Kconfig|  8 
>  arch/powerpc/kvm/powerpc.c  |  4 +---
>  arch/riscv/include/asm/kvm_host.h   |  2 --
>  arch/riscv/kvm/Kconfig  |  2 +-
>  arch/x86/include/asm/kvm_host.h |  2 --
>  arch/x86/kvm/Kconfig|  2 +-
>  include/linux/kvm_host.h|  8 +---
>  virt/kvm/Kconfig|  4 
>  virt/kvm/kvm_main.c | 10 +-
>  14 files changed, 23 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 8b6096753740..50d89d400bf1 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -912,8 +912,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
>  int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
> struct kvm_vcpu_events *events);
>
> -#define KVM_ARCH_WANT_MMU_NOTIFIER
> -
>  void kvm_arm_halt_guest(struct kvm *kvm);
>  void kvm_arm_resume_guest(struct kvm *kvm);
>
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index f531da6b362e..a650b46f4f2f 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -22,7 +22,7 @@ menuconfig KVM
>   bool "Kernel-based Virtual Machine (KVM) support"
>   depends on HAVE_KVM
>   select KVM_GENERIC_HARDWARE_ENABLING
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>   select PREEMPT_NOTIFIERS
>   select HAVE_KVM_CPU_RELAX_INTERCEPT
>   select HAVE_KVM_ARCH_TLB_FLUSH_ALL
> diff --git a/arch/mips/include/asm/kvm_host.h 
> b/arch/mips/include/asm/kvm_host.h
> index 04cedf9f8811..22a41d941bf3 100644
> --- a/arch/mips/include/asm/kvm_host.h
> +++ b/arch/mips/include/asm/kvm_host.h
> @@ -810,8 +810,6 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t 
> start_gfn, gfn_t end_gfn);
>  pgd_t *kvm_pgd_alloc(void);
>  void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
>
> -#define KVM_ARCH_WANT_MMU_NOTIFIER
> -
>  /* Emulation */
>  enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
>  int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
> diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
> index a8cdba75f98d..c04987d2ed2e 100644
> --- a/arch/mips/kvm/Kconfig
> +++ b/arch/mips/kvm/Kconfig
> @@ -25,7 +25,7 @@ config KVM
>   select HAVE_KVM_EVENTFD
>   select HAVE_KVM_VCPU_ASYNC_IOCTL
>   select KVM_MMIO
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>   select INTERVAL_TREE
>   select KVM_GENERIC_HARDWARE_ENABLING
>   help
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 14ee0dece853..4b5c3f2acf78 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -62,8 +62,6 @@
>
>  #include 
>
> -#define KVM_ARCH_WANT_MMU_NOTIFIER
> -
>  #define HPTEG_CACHE_NUM  (1 << 15)
>  #define HPTEG_HASH_BITS_PTE  13
>  #define HPTEG_HASH_BITS_PTE_LONG 12
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 902611954200..b33358ee6424 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -42,7 +42,7 @@ config KVM_BOOK3S_64_HANDLER
>  config KVM_BOOK3S_PR_POSSIBLE
>   bool
>   select KVM_MMIO
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>
>  config KVM_BOOK3S_HV_POSSIBLE
>   bool
> @@ -85,7 +85,7 @@ config KVM_BOOK3S_64_HV
>   tristate "KVM for POWER7 and later using hypervisor mode in host"
>   depends on KVM_BOOK3S_64 && PPC_POWERNV
>   select KVM_BOOK3S_HV_POSSIBLE
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>   select CMA
>   help
> Support running unmodified book3s_64 guest kernels in
> @@ -194,7 +194,7 @@ config KVM_E500V2
>   depends on !CONTEXT_TRACKING_USER
>   select KVM
>   select KVM_MMIO
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>   help
> Support running unmodified E500 guest kernels in virtual machines on
> E500v2 host processors.
> @@ -211,7 +211,7 @@ config KVM_E500MC
>   select KVM
>   select KVM_MMIO
>   select KVM_BOOKE_HV
> - select MMU_NOTIFIER
> + select KVM_GENERIC_MMU_NOTIFIER
>   help
> Support running unmodified E500MC/E5500/E6500 guest kernels in
> virtual machines on E500MC/E5500/E6500 host processors.
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 5cf9e5e3112a..f97fbac7eac9 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ 

Re: [PATCH 08/44] KVM: x86: Move hardware setup/unsetup to init/exit

2022-11-04 Thread Yuan Yao
On Wed, Nov 02, 2022 at 11:18:35PM +, Sean Christopherson wrote:
> Now that kvm_arch_hardware_setup() is called immediately after
> kvm_arch_init(), fold the guts of kvm_arch_hardware_(un)setup() into
> kvm_arch_{init,exit}() as a step towards dropping one of the hooks.
>
> To avoid having to unwind various setup, e.g registration of several
> notifiers, slot in the vendor hardware setup before the registration of
> said notifiers and callbacks.  Introducing a functional change while
> moving code is less than ideal, but the alternative is adding a pile of
> unwinding code, which is much more error prone, e.g. several attempts to
> move the setup code verbatim all introduced bugs.
>
> Add a comment to document that kvm_ops_update() is effectively the point
> of no return, e.g. it sets the kvm_x86_ops.hardware_enable canary and so
> needs to be unwound.
>
> Signed-off-by: Sean Christopherson 
> ---
>  arch/x86/kvm/x86.c | 121 +++--
>  1 file changed, 63 insertions(+), 58 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9a7702b1c563..80ee580a9cd4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9252,6 +9252,24 @@ static struct notifier_block pvclock_gtod_notifier = {
>  };
>  #endif
>
> +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
> +{
> + memcpy(_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
> +
> +#define __KVM_X86_OP(func) \
> + static_call_update(kvm_x86_##func, kvm_x86_ops.func);
> +#define KVM_X86_OP(func) \
> + WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
> +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
> +#define KVM_X86_OP_OPTIONAL_RET0(func) \
> + static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
> +(void *)__static_call_return0);
> +#include 
> +#undef __KVM_X86_OP
> +
> + kvm_pmu_ops_update(ops->pmu_ops);
> +}
> +
>  int kvm_arch_init(void *opaque)
>  {
>   struct kvm_x86_init_ops *ops = opaque;
> @@ -9325,6 +9343,24 @@ int kvm_arch_init(void *opaque)
>   kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
>   }
>
> + rdmsrl_safe(MSR_EFER, _efer);
> +
> + if (boot_cpu_has(X86_FEATURE_XSAVES))
> + rdmsrl(MSR_IA32_XSS, host_xss);
> +
> + kvm_init_pmu_capability();
> +
> + r = ops->hardware_setup();
> + if (r != 0)
> + goto out_mmu_exit;

The failure case of ops->hardware_setup() is unwound
by kvm_arch_exit() before this patch, do we need to
keep that old behavior ?

> +
> + /*
> +  * Point of no return!  DO NOT add error paths below this point unless
> +  * absolutely necessary, as most operations from this point forward
> +  * require unwinding.
> +  */
> + kvm_ops_update(ops);
> +
>   kvm_timer_init();
>
>   if (pi_inject_timer == -1)
> @@ -9336,8 +9372,32 @@ int kvm_arch_init(void *opaque)
>   set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
>  #endif
>
> + kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
> +
> + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> + kvm_caps.supported_xss = 0;
> +
> +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
> + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
> +#undef __kvm_cpu_cap_has
> +
> + if (kvm_caps.has_tsc_control) {
> + /*
> +  * Make sure the user can only configure tsc_khz values that
> +  * fit into a signed integer.
> +  * A min value is not calculated because it will always
> +  * be 1 on all machines.
> +  */
> + u64 max = min(0x7fffULL,
> +   __scale_tsc(kvm_caps.max_tsc_scaling_ratio, 
> tsc_khz));
> + kvm_caps.max_guest_tsc_khz = max;
> + }
> + kvm_caps.default_tsc_scaling_ratio = 1ULL << 
> kvm_caps.tsc_scaling_ratio_frac_bits;
> + kvm_init_msr_list();
>   return 0;
>
> +out_mmu_exit:
> + kvm_mmu_vendor_module_exit();
>  out_free_percpu:
>   free_percpu(user_return_msrs);
>  out_free_x86_emulator_cache:
> @@ -9347,6 +9407,8 @@ int kvm_arch_init(void *opaque)
>
>  void kvm_arch_exit(void)
>  {
> + kvm_unregister_perf_callbacks();
> +
>  #ifdef CONFIG_X86_64
>   if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
>   clear_hv_tscchange_cb();
> @@ -9362,6 +9424,7 @@ void kvm_arch_exit(void)
>   irq_work_sync(_irq_work);
>   cancel_work_sync(_gtod_work);
>  #endif
> + static_call(kvm_x86_hardware_unsetup)();
>   kvm_x86_ops.hardware_enable = NULL;
>   kvm_mmu_vendor_module_exit();
>   free_percpu(user_return_msrs);
> @@ -11922,72 +11985,14 @@ void kvm_arch_hardware_disable(void)
>   drop_user_return_notifiers();
>  }
>
> -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
> -{
> - memcpy(_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
> -
> -#define __KVM_X86_OP(func) \
> - 

Re: [PATCH 03/44] KVM: Allocate cpus_hardware_enabled after arch hardware setup

2022-11-04 Thread Yuan Yao
On Wed, Nov 02, 2022 at 11:18:30PM +, Sean Christopherson wrote:
> Allocate cpus_hardware_enabled after arch hardware setup so that arch
> "init" and "hardware setup" are called back-to-back and thus can be
> combined in a future patch.  cpus_hardware_enabled is never used before
> kvm_create_vm(), i.e. doesn't have a dependency with hardware setup and
> only needs to be allocated before /dev/kvm is exposed to userspace.
>
> Free the object before the arch hooks are invoked to maintain symmetry,
> and so that arch code can move away from the hooks without having to
> worry about ordering changes.
>
> Signed-off-by: Sean Christopherson 
> ---
>  virt/kvm/kvm_main.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e0424af52acc..8b7534cc953b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -5843,15 +5843,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, 
> unsigned vcpu_align,
>   if (r)
>   return r;
>
> + r = kvm_arch_hardware_setup(opaque);
> + if (r < 0)
> + goto err_hw_setup;
> +
>   if (!zalloc_cpumask_var(_hardware_enabled, GFP_KERNEL)) {
>   r = -ENOMEM;
>   goto err_hw_enabled;
>   }
>
> - r = kvm_arch_hardware_setup(opaque);
> - if (r < 0)
> - goto out_free_1;
> -
>   c.ret = 
>   c.opaque = opaque;
>   for_each_online_cpu(cpu) {
> @@ -5937,10 +5937,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, 
> unsigned vcpu_align,
>   unregister_reboot_notifier(_reboot_notifier);
>   cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
>  out_free_2:
> - kvm_arch_hardware_unsetup();
> -out_free_1:
>   free_cpumask_var(cpus_hardware_enabled);
>  err_hw_enabled:
> + kvm_arch_hardware_unsetup();
> +err_hw_setup:
>   kvm_arch_exit();
>   return r;
>  }
> @@ -5967,9 +5967,9 @@ void kvm_exit(void)
>   cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
>   on_each_cpu(hardware_disable_nolock, NULL, 1);
>   kvm_irqfd_exit();
> + free_cpumask_var(cpus_hardware_enabled);
>   kvm_arch_hardware_unsetup();
>   kvm_arch_exit();
> - free_cpumask_var(cpus_hardware_enabled);
>   kvm_vfio_ops_exit();

Looks good to me.

Reviewed-by: Yuan Yao 

>  }
>  EXPORT_SYMBOL_GPL(kvm_exit);
> --
> 2.38.1.431.g37b22c650d-goog
>


[PATCH] powerpc/corenet32: enable DMA in defconfig

2015-07-22 Thread Yuan Yao
By default we enable DMA(CONFIG_FSL_DMA) support
which are needed on P2041RDB, P3041DS, P4080DS,
B4860QDS, etc.

Signed-off-by: Yuan Yao yao.y...@freescale.com
---
 arch/powerpc/configs/corenet32_smp_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/configs/corenet32_smp_defconfig 
b/arch/powerpc/configs/corenet32_smp_defconfig
index 3765993..0c175cc 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -143,6 +143,8 @@ CONFIG_RTC_DRV_DS1307=y
 CONFIG_RTC_DRV_DS1374=y
 CONFIG_RTC_DRV_DS3232=y
 CONFIG_UIO=y
+CONFIG_DMADEVICES=y
+CONFIG_FSL_DMA=y
 CONFIG_VIRT_DRIVERS=y
 CONFIG_FSL_HV_MANAGER=y
 CONFIG_STAGING=y
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev