Re: [RFC PATCH v2 5/6] KVM: PPC: Add support for nested PAPR guests
On Wed, Jun 7, 2023 at 7:09 PM Nicholas Piggin wrote: [snip] > > You lost your comments. Thanks > > > diff --git a/arch/powerpc/include/asm/kvm_book3s.h > > b/arch/powerpc/include/asm/kvm_book3s.h > > index 0ca2d8b37b42..c5c57552b447 100644 > > --- a/arch/powerpc/include/asm/kvm_book3s.h > > +++ b/arch/powerpc/include/asm/kvm_book3s.h > > @@ -12,6 +12,7 @@ > > #include > > #include > > #include > > +#include > > > > struct kvmppc_bat { > > u64 raw; > > @@ -316,6 +317,57 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu > > *vcpu); > > > > void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); > > > > + > > +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > > + > > +extern bool __kvmhv_on_papr; > > + > > +static inline bool kvmhv_on_papr(void) > > +{ > > + return __kvmhv_on_papr; > > +} > > It's a nitpick, but kvmhv_on_pseries() is because we're runnning KVM-HV > on a pseries guest kernel. Which is a papr guest kernel. So this kind of > doesn't make sense if you read it the same way. > > kvmhv_nested_using_papr() or something like that might read a bit > better. Will we go with kvmhv_using_nested_v2()? > > This could be a static key too. Will do. > > > @@ -575,6 +593,7 @@ struct kvm_vcpu_arch { > > ulong dscr; > > ulong amr; > > ulong uamor; > > + ulong amor; > > ulong iamr; > > u32 ctrl; > > u32 dabrx; > > This belongs somewhere else. It can be dropped. > > > @@ -829,6 +848,8 @@ struct kvm_vcpu_arch { > > u64 nested_hfscr; /* HFSCR that the L1 requested for the nested > > guest */ > > u32 nested_vcpu_id; > > gpa_t nested_io_gpr; > > + /* For nested APIv2 guests*/ > > + struct kvmhv_papr_host papr_host; > > #endif > > This is not exactly a papr host. Might have to come up with a better > name especially if we implement a L0 things could get confusing. Any name ideas? nestedv2_state? > > > @@ -342,6 +343,203 @@ static inline long > > plpar_get_cpu_characteristics(struct h_cpu_char_result *p) > > return rc; > > } > > > > +static inline long plpar_guest_create(unsigned long flags, unsigned long > > *guest_id) > > +{ > > + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; > > + unsigned long token; > > + long rc; > > + > > + token = -1UL; > > + while (true) { > > + rc = plpar_hcall(H_GUEST_CREATE, retbuf, flags, token); > > + if (rc == H_SUCCESS) { > > + *guest_id = retbuf[0]; > > + break; > > + } > > + > > + if (rc == H_BUSY) { > > + token = retbuf[0]; > > + cpu_relax(); > > + continue; > > + } > > + > > + if (H_IS_LONG_BUSY(rc)) { > > + token = retbuf[0]; > > + mdelay(get_longbusy_msecs(rc)); > > All of these things need a non-sleeping delay? Can we sleep instead? > Or if not, might have to think about going back to the caller and it > can retry. > > get/set state might be a bit inconvenient, although I don't expect > that should potentially take so long as guest and vcpu create/delete, > so at least those ones would be good if they're called while > preemptable. Yeah no reason not to sleep except for get/set, let me try it out. > > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > > index 521d84621422..f22ee582e209 100644 > > --- a/arch/powerpc/kvm/book3s_hv.c > > +++ b/arch/powerpc/kvm/book3s_hv.c > > @@ -383,6 +383,11 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu > > *vcpu) > > spin_unlock_irqrestore(>arch.tbacct_lock, flags); > > } > > > > +static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) > > +{ > > + vcpu->arch.pvr = pvr; > > +} > > Didn't you lose this in a previous patch? I thought it must have moved > to a header but it reappears. Yes, that was meant to stay put. > > > + > > /* Dummy value used in computing PCR value below */ > > #define PCR_ARCH_31(PCR_ARCH_300 << 1) > > > > @@ -1262,13 +1267,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > > return RESUME_HOST; > > break; > > #endif > > - case H_RANDOM: > > + case H_RANDOM: { > > unsigned long rand; > > > > if (!arch_get_random_seed_longs(, 1)) > > ret = H_HARDWARE; > > kvmppc_set_gpr(vcpu, 4, rand); > > break; > > + } > > case H_RPT_INVALIDATE: > > ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4), > > kvmppc_get_gpr(vcpu, 5), > > Compile fix for a previous patch. Thanks. > > > @@ -2921,14 +2927,21 @@ static int kvmppc_core_vcpu_create_hv(struct > > kvm_vcpu *vcpu) > > vcpu->arch.shared_big_endian = false; > > #endif > > #endif > > - kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC); > > > > + if (kvmhv_on_papr()) { > > +
Re: [RFC PATCH v2 5/6] KVM: PPC: Add support for nested PAPR guests
On Mon Jun 5, 2023 at 4:48 PM AEST, Jordan Niethe wrote: > A series of hcalls have been added to the PAPR which allow a regular > guest partition to create and manage guest partitions of its own. Add > support to KVM to utilize these hcalls to enable running nested guests. > > Overview of the new hcall usage: > > - L1 and L0 negotiate capabilities with > H_GUEST_{G,S}ET_CAPABILITIES() > > - L1 requests the L0 create a L2 with > H_GUEST_CREATE() and receives a handle to use in future hcalls > > - L1 requests the L0 create a L2 vCPU with > H_GUEST_CREATE_VCPU() > > - L1 sets up the L2 using H_GUEST_SET and the > H_GUEST_VCPU_RUN input buffer > > - L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN() > > - L2 returns to L1 with an exit reason and L1 reads the > H_GUEST_VCPU_RUN output buffer populated by the L0 > > - L1 handles the exit using H_GET_STATE if necessary > > - L1 reruns L2 vCPU with H_GUEST_VCPU_RUN > > - L1 frees the L2 in the L0 with H_GUEST_DELETE() > > Support for the new API is determined by trying > H_GUEST_GET_CAPABILITIES. On a successful return, the new API will then > be used. > > Use the vcpu register state setters for tracking modified guest state > elements and copy the thread wide values into the H_GUEST_VCPU_RUN input > buffer immediately before running a L2. The guest wide > elements can not be added to the input buffer so send them with a > separate H_GUEST_SET call if necessary. > > Make the vcpu register getter load the corresponding value from the real > host with H_GUEST_GET. To avoid unnecessarily calling H_GUEST_GET, track > which values have already been loaded between H_GUEST_VCPU_RUN calls. If > an element is present in the H_GUEST_VCPU_RUN output buffer it also does > not need to be loaded again. > > There is existing support for running nested guests on KVM > with powernv. However the interface used for this is not supported by > other PAPR hosts. This existing API is still supported. > > Signed-off-by: Jordan Niethe > --- > v2: > - Declare op structs as static > - Use expressions in switch case with local variables > - Do not use the PVR for the LOGICAL PVR ID > - Handle emul_inst as now a double word > - Use new GPR(), etc macros > - Determine PAPR nested capabilities from cpu features > --- > arch/powerpc/include/asm/guest-state-buffer.h | 105 +- > arch/powerpc/include/asm/hvcall.h | 30 + > arch/powerpc/include/asm/kvm_book3s.h | 122 ++- > arch/powerpc/include/asm/kvm_book3s_64.h | 6 + > arch/powerpc/include/asm/kvm_host.h | 21 + > arch/powerpc/include/asm/kvm_ppc.h| 64 +- > arch/powerpc/include/asm/plpar_wrappers.h | 198 > arch/powerpc/kvm/Makefile | 1 + > arch/powerpc/kvm/book3s_hv.c | 126 ++- > arch/powerpc/kvm/book3s_hv.h | 74 +- > arch/powerpc/kvm/book3s_hv_nested.c | 38 +- > arch/powerpc/kvm/book3s_hv_papr.c | 940 ++ > arch/powerpc/kvm/emulate_loadstore.c | 4 +- > arch/powerpc/kvm/guest-state-buffer.c | 49 + > 14 files changed, 1684 insertions(+), 94 deletions(-) > create mode 100644 arch/powerpc/kvm/book3s_hv_papr.c > > diff --git a/arch/powerpc/include/asm/guest-state-buffer.h > b/arch/powerpc/include/asm/guest-state-buffer.h > index 65a840abf1bb..116126edd8e2 100644 > --- a/arch/powerpc/include/asm/guest-state-buffer.h > +++ b/arch/powerpc/include/asm/guest-state-buffer.h > @@ -5,6 +5,7 @@ > #ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H > #define _ASM_POWERPC_GUEST_STATE_BUFFER_H > > +#include "asm/hvcall.h" > #include > #include > #include > @@ -14,16 +15,16 @@ > **/ > #define GSID_BLANK 0x > > -#define GSID_HOST_STATE_SIZE 0x0001 /* Size of Hypervisor Internal > Format VCPU state */ > -#define GSID_RUN_OUTPUT_MIN_SIZE 0x0002 /* Minimum size of the Run VCPU > output buffer */ > -#define GSID_LOGICAL_PVR 0x0003 /* Logical PVR */ > -#define GSID_TB_OFFSET 0x0004 /* Timebase Offset */ > -#define GSID_PARTITION_TABLE 0x0005 /* Partition Scoped Page Table */ > -#define GSID_PROCESS_TABLE 0x0006 /* Process Table */ > +#define GSID_HOST_STATE_SIZE 0x0001 > +#define GSID_RUN_OUTPUT_MIN_SIZE 0x0002 > +#define GSID_LOGICAL_PVR 0x0003 > +#define GSID_TB_OFFSET 0x0004 > +#define GSID_PARTITION_TABLE 0x0005 > +#define GSID_PROCESS_TABLE 0x0006 You lost your comments. > diff --git a/arch/powerpc/include/asm/kvm_book3s.h > b/arch/powerpc/include/asm/kvm_book3s.h > index 0ca2d8b37b42..c5c57552b447 100644 > --- a/arch/powerpc/include/asm/kvm_book3s.h > +++ b/arch/powerpc/include/asm/kvm_book3s.h > @@ -12,6 +12,7 @@ > #include > #include > #include > +#include > > struct kvmppc_bat { > u64 raw; > @@