Re: [RFC PATCH v2 5/6] KVM: PPC: Add support for nested PAPR guests

2023-06-09 Thread Jordan Niethe
On Wed, Jun 7, 2023 at 7:09 PM Nicholas Piggin  wrote:
[snip]
>
> You lost your comments.

Thanks

>
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> > b/arch/powerpc/include/asm/kvm_book3s.h
> > index 0ca2d8b37b42..c5c57552b447 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -12,6 +12,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  struct kvmppc_bat {
> >   u64 raw;
> > @@ -316,6 +317,57 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu 
> > *vcpu);
> >
> >  void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
> >
> > +
> > +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> > +
> > +extern bool __kvmhv_on_papr;
> > +
> > +static inline bool kvmhv_on_papr(void)
> > +{
> > + return __kvmhv_on_papr;
> > +}
>
> It's a nitpick, but kvmhv_on_pseries() is because we're runnning KVM-HV
> on a pseries guest kernel. Which is a papr guest kernel. So this kind of
> doesn't make sense if you read it the same way.
>
> kvmhv_nested_using_papr() or something like that might read a bit
> better.

Will we go with kvmhv_using_nested_v2()?

>
> This could be a static key too.

Will do.

>
> > @@ -575,6 +593,7 @@ struct kvm_vcpu_arch {
> >   ulong dscr;
> >   ulong amr;
> >   ulong uamor;
> > + ulong amor;
> >   ulong iamr;
> >   u32 ctrl;
> >   u32 dabrx;
>
> This belongs somewhere else.

It can be dropped.

>
> > @@ -829,6 +848,8 @@ struct kvm_vcpu_arch {
> >   u64 nested_hfscr;   /* HFSCR that the L1 requested for the nested 
> > guest */
> >   u32 nested_vcpu_id;
> >   gpa_t nested_io_gpr;
> > + /* For nested APIv2 guests*/
> > + struct kvmhv_papr_host papr_host;
> >  #endif
>
> This is not exactly a papr host. Might have to come up with a better
> name especially if we implement a L0 things could get confusing.

Any name ideas? nestedv2_state?

>
> > @@ -342,6 +343,203 @@ static inline long 
> > plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
> >   return rc;
> >  }
> >
> > +static inline long plpar_guest_create(unsigned long flags, unsigned long 
> > *guest_id)
> > +{
> > + unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
> > + unsigned long token;
> > + long rc;
> > +
> > + token = -1UL;
> > + while (true) {
> > + rc = plpar_hcall(H_GUEST_CREATE, retbuf, flags, token);
> > + if (rc == H_SUCCESS) {
> > + *guest_id = retbuf[0];
> > + break;
> > + }
> > +
> > + if (rc == H_BUSY) {
> > + token = retbuf[0];
> > + cpu_relax();
> > + continue;
> > + }
> > +
> > + if (H_IS_LONG_BUSY(rc)) {
> > + token = retbuf[0];
> > + mdelay(get_longbusy_msecs(rc));
>
> All of these things need a non-sleeping delay? Can we sleep instead?
> Or if not, might have to think about going back to the caller and it
> can retry.
>
> get/set state might be a bit inconvenient, although I don't expect
> that should potentially take so long as guest and vcpu create/delete,
> so at least those ones would be good if they're called while
> preemptable.

Yeah no reason not to sleep except for get/set, let me try it out.

>
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 521d84621422..f22ee582e209 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -383,6 +383,11 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu 
> > *vcpu)
> >   spin_unlock_irqrestore(>arch.tbacct_lock, flags);
> >  }
> >
> > +static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
> > +{
> > + vcpu->arch.pvr = pvr;
> > +}
>
> Didn't you lose this in a previous patch? I thought it must have moved
> to a header but it reappears.

Yes, that was meant to stay put.

>
> > +
> >  /* Dummy value used in computing PCR value below */
> >  #define PCR_ARCH_31(PCR_ARCH_300 << 1)
> >
> > @@ -1262,13 +1267,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
> >   return RESUME_HOST;
> >   break;
> >  #endif
> > - case H_RANDOM:
> > + case H_RANDOM: {
> >   unsigned long rand;
> >
> >   if (!arch_get_random_seed_longs(, 1))
> >   ret = H_HARDWARE;
> >   kvmppc_set_gpr(vcpu, 4, rand);
> >   break;
> > + }
> >   case H_RPT_INVALIDATE:
> >   ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
> > kvmppc_get_gpr(vcpu, 5),
>
> Compile fix for a previous patch.

Thanks.

>
> > @@ -2921,14 +2927,21 @@ static int kvmppc_core_vcpu_create_hv(struct 
> > kvm_vcpu *vcpu)
> >   vcpu->arch.shared_big_endian = false;
> >  #endif
> >  #endif
> > - kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
> >
> > + if (kvmhv_on_papr()) {
> > + 

Re: [RFC PATCH v2 5/6] KVM: PPC: Add support for nested PAPR guests

2023-06-07 Thread Nicholas Piggin
On Mon Jun 5, 2023 at 4:48 PM AEST, Jordan Niethe wrote:
> A series of hcalls have been added to the PAPR which allow a regular
> guest partition to create and manage guest partitions of its own. Add
> support to KVM to utilize these hcalls to enable running nested guests.
>
> Overview of the new hcall usage:
>
> - L1 and L0 negotiate capabilities with
>   H_GUEST_{G,S}ET_CAPABILITIES()
>
> - L1 requests the L0 create a L2 with
>   H_GUEST_CREATE() and receives a handle to use in future hcalls
>
> - L1 requests the L0 create a L2 vCPU with
>   H_GUEST_CREATE_VCPU()
>
> - L1 sets up the L2 using H_GUEST_SET and the
>   H_GUEST_VCPU_RUN input buffer
>
> - L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN()
>
> - L2 returns to L1 with an exit reason and L1 reads the
>   H_GUEST_VCPU_RUN output buffer populated by the L0
>
> - L1 handles the exit using H_GET_STATE if necessary
>
> - L1 reruns L2 vCPU with H_GUEST_VCPU_RUN
>
> - L1 frees the L2 in the L0 with H_GUEST_DELETE()
>
> Support for the new API is determined by trying
> H_GUEST_GET_CAPABILITIES. On a successful return, the new API will then
> be used.
>
> Use the vcpu register state setters for tracking modified guest state
> elements and copy the thread wide values into the H_GUEST_VCPU_RUN input
> buffer immediately before running a L2. The guest wide
> elements can not be added to the input buffer so send them with a
> separate H_GUEST_SET call if necessary.
>
> Make the vcpu register getter load the corresponding value from the real
> host with H_GUEST_GET. To avoid unnecessarily calling H_GUEST_GET, track
> which values have already been loaded between H_GUEST_VCPU_RUN calls. If
> an element is present in the H_GUEST_VCPU_RUN output buffer it also does
> not need to be loaded again.
>
> There is existing support for running nested guests on KVM
> with powernv. However the interface used for this is not supported by
> other PAPR hosts. This existing API is still supported.
>
> Signed-off-by: Jordan Niethe 
> ---
> v2:
>   - Declare op structs as static
>   - Use expressions in switch case with local variables
>   - Do not use the PVR for the LOGICAL PVR ID
>   - Handle emul_inst as now a double word
>   - Use new GPR(), etc macros
>   - Determine PAPR nested capabilities from cpu features
> ---
>  arch/powerpc/include/asm/guest-state-buffer.h | 105 +-
>  arch/powerpc/include/asm/hvcall.h |  30 +
>  arch/powerpc/include/asm/kvm_book3s.h | 122 ++-
>  arch/powerpc/include/asm/kvm_book3s_64.h  |   6 +
>  arch/powerpc/include/asm/kvm_host.h   |  21 +
>  arch/powerpc/include/asm/kvm_ppc.h|  64 +-
>  arch/powerpc/include/asm/plpar_wrappers.h | 198 
>  arch/powerpc/kvm/Makefile |   1 +
>  arch/powerpc/kvm/book3s_hv.c  | 126 ++-
>  arch/powerpc/kvm/book3s_hv.h  |  74 +-
>  arch/powerpc/kvm/book3s_hv_nested.c   |  38 +-
>  arch/powerpc/kvm/book3s_hv_papr.c | 940 ++
>  arch/powerpc/kvm/emulate_loadstore.c  |   4 +-
>  arch/powerpc/kvm/guest-state-buffer.c |  49 +
>  14 files changed, 1684 insertions(+), 94 deletions(-)
>  create mode 100644 arch/powerpc/kvm/book3s_hv_papr.c
>
> diff --git a/arch/powerpc/include/asm/guest-state-buffer.h 
> b/arch/powerpc/include/asm/guest-state-buffer.h
> index 65a840abf1bb..116126edd8e2 100644
> --- a/arch/powerpc/include/asm/guest-state-buffer.h
> +++ b/arch/powerpc/include/asm/guest-state-buffer.h
> @@ -5,6 +5,7 @@
>  #ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H
>  #define _ASM_POWERPC_GUEST_STATE_BUFFER_H
>  
> +#include "asm/hvcall.h"
>  #include 
>  #include 
>  #include 
> @@ -14,16 +15,16 @@
>   **/
>  #define GSID_BLANK   0x
>  
> -#define GSID_HOST_STATE_SIZE 0x0001 /* Size of Hypervisor Internal 
> Format VCPU state */
> -#define GSID_RUN_OUTPUT_MIN_SIZE 0x0002 /* Minimum size of the Run VCPU 
> output buffer */
> -#define GSID_LOGICAL_PVR 0x0003 /* Logical PVR */
> -#define GSID_TB_OFFSET   0x0004 /* Timebase Offset */
> -#define GSID_PARTITION_TABLE 0x0005 /* Partition Scoped Page Table */
> -#define GSID_PROCESS_TABLE   0x0006 /* Process Table */
> +#define GSID_HOST_STATE_SIZE 0x0001
> +#define GSID_RUN_OUTPUT_MIN_SIZE 0x0002
> +#define GSID_LOGICAL_PVR 0x0003
> +#define GSID_TB_OFFSET   0x0004
> +#define GSID_PARTITION_TABLE 0x0005
> +#define GSID_PROCESS_TABLE   0x0006

You lost your comments.

> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 0ca2d8b37b42..c5c57552b447 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  struct kvmppc_bat {
>   u64 raw;
> @@ 

[RFC PATCH v2 5/6] KVM: PPC: Add support for nested PAPR guests

2023-06-05 Thread Jordan Niethe
A series of hcalls have been added to the PAPR which allow a regular
guest partition to create and manage guest partitions of its own. Add
support to KVM to utilize these hcalls to enable running nested guests.

Overview of the new hcall usage:

- L1 and L0 negotiate capabilities with
  H_GUEST_{G,S}ET_CAPABILITIES()

- L1 requests the L0 create a L2 with
  H_GUEST_CREATE() and receives a handle to use in future hcalls

- L1 requests the L0 create a L2 vCPU with
  H_GUEST_CREATE_VCPU()

- L1 sets up the L2 using H_GUEST_SET and the
  H_GUEST_VCPU_RUN input buffer

- L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN()

- L2 returns to L1 with an exit reason and L1 reads the
  H_GUEST_VCPU_RUN output buffer populated by the L0

- L1 handles the exit using H_GET_STATE if necessary

- L1 reruns L2 vCPU with H_GUEST_VCPU_RUN

- L1 frees the L2 in the L0 with H_GUEST_DELETE()

Support for the new API is determined by trying
H_GUEST_GET_CAPABILITIES. On a successful return, the new API will then
be used.

Use the vcpu register state setters for tracking modified guest state
elements and copy the thread wide values into the H_GUEST_VCPU_RUN input
buffer immediately before running a L2. The guest wide
elements can not be added to the input buffer so send them with a
separate H_GUEST_SET call if necessary.

Make the vcpu register getter load the corresponding value from the real
host with H_GUEST_GET. To avoid unnecessarily calling H_GUEST_GET, track
which values have already been loaded between H_GUEST_VCPU_RUN calls. If
an element is present in the H_GUEST_VCPU_RUN output buffer it also does
not need to be loaded again.

There is existing support for running nested guests on KVM
with powernv. However the interface used for this is not supported by
other PAPR hosts. This existing API is still supported.

Signed-off-by: Jordan Niethe 
---
v2:
  - Declare op structs as static
  - Use expressions in switch case with local variables
  - Do not use the PVR for the LOGICAL PVR ID
  - Handle emul_inst as now a double word
  - Use new GPR(), etc macros
  - Determine PAPR nested capabilities from cpu features
---
 arch/powerpc/include/asm/guest-state-buffer.h | 105 +-
 arch/powerpc/include/asm/hvcall.h |  30 +
 arch/powerpc/include/asm/kvm_book3s.h | 122 ++-
 arch/powerpc/include/asm/kvm_book3s_64.h  |   6 +
 arch/powerpc/include/asm/kvm_host.h   |  21 +
 arch/powerpc/include/asm/kvm_ppc.h|  64 +-
 arch/powerpc/include/asm/plpar_wrappers.h | 198 
 arch/powerpc/kvm/Makefile |   1 +
 arch/powerpc/kvm/book3s_hv.c  | 126 ++-
 arch/powerpc/kvm/book3s_hv.h  |  74 +-
 arch/powerpc/kvm/book3s_hv_nested.c   |  38 +-
 arch/powerpc/kvm/book3s_hv_papr.c | 940 ++
 arch/powerpc/kvm/emulate_loadstore.c  |   4 +-
 arch/powerpc/kvm/guest-state-buffer.c |  49 +
 14 files changed, 1684 insertions(+), 94 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_papr.c

diff --git a/arch/powerpc/include/asm/guest-state-buffer.h 
b/arch/powerpc/include/asm/guest-state-buffer.h
index 65a840abf1bb..116126edd8e2 100644
--- a/arch/powerpc/include/asm/guest-state-buffer.h
+++ b/arch/powerpc/include/asm/guest-state-buffer.h
@@ -5,6 +5,7 @@
 #ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H
 #define _ASM_POWERPC_GUEST_STATE_BUFFER_H
 
+#include "asm/hvcall.h"
 #include 
 #include 
 #include 
@@ -14,16 +15,16 @@
  **/
 #define GSID_BLANK 0x
 
-#define GSID_HOST_STATE_SIZE   0x0001 /* Size of Hypervisor Internal 
Format VCPU state */
-#define GSID_RUN_OUTPUT_MIN_SIZE   0x0002 /* Minimum size of the Run VCPU 
output buffer */
-#define GSID_LOGICAL_PVR   0x0003 /* Logical PVR */
-#define GSID_TB_OFFSET 0x0004 /* Timebase Offset */
-#define GSID_PARTITION_TABLE   0x0005 /* Partition Scoped Page Table */
-#define GSID_PROCESS_TABLE 0x0006 /* Process Table */
+#define GSID_HOST_STATE_SIZE   0x0001
+#define GSID_RUN_OUTPUT_MIN_SIZE   0x0002
+#define GSID_LOGICAL_PVR   0x0003
+#define GSID_TB_OFFSET 0x0004
+#define GSID_PARTITION_TABLE   0x0005
+#define GSID_PROCESS_TABLE 0x0006
 
-#define GSID_RUN_INPUT 0x0C00 /* Run VCPU Input Buffer */
-#define GSID_RUN_OUTPUT0x0C01 /* Run VCPU Out Buffer */
-#define GSID_VPA   0x0C02 /* HRA to Guest VCPU VPA */
+#define GSID_RUN_INPUT 0x0C00
+#define GSID_RUN_OUTPUT0x0C01
+#define GSID_VPA   0x0C02
 
 #define GSID_GPR(x)(0x1000 + (x))
 #define GSID_HDEC_EXPIRY_TB0x1020
@@ -300,6 +301,8 @@ struct gs_buff *gsb_new(size_t size, unsigned long guest_id,
unsigned long vcpu_id, gfp_t