Re: [PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-13 Thread Paul Mackerras
On Wed, Nov 13, 2019 at 08:59:08PM +0530, Bharata B Rao wrote:
> On Tue, Nov 12, 2019 at 04:34:34PM +1100, Paul Mackerras wrote:
> > On Mon, Nov 04, 2019 at 09:47:58AM +0530, Bharata B Rao wrote:
> > [snip]
> > > @@ -5442,6 +5471,64 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu 
> > > *vcpu, ulong *eaddr, void *ptr,
> > >   return rc;
> > >  }
> > >  
> > > +/*
> > > + *  IOCTL handler to turn off secure mode of guest
> > > + *
> > > + * - Issue ucall to terminate the guest on the UV side
> > > + * - Unpin the VPA pages (Enables these pages to be migrated back
> > > + *   when VM becomes secure again)
> > > + * - Recreate partition table as the guest is transitioning back to
> > > + *   normal mode
> > > + * - Release all device pages
> > > + */
> > > +static int kvmhv_svm_off(struct kvm *kvm)
> > > +{
> > > + struct kvm_vcpu *vcpu;
> > > + int srcu_idx;
> > > + int ret = 0;
> > > + int i;
> > > +
> > > + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
> > > + return ret;
> > > +
> > 
> > A further comment on this code: it should check that no vcpus are
> > running and fail if any are running, and it should prevent any vcpus
> > from running until the function is finished, using code like that in
> > kvmhv_configure_mmu().  That is, it should do something like this:
> > 
> > mutex_lock(>arch.mmu_setup_lock);
> > mmu_was_ready = kvm->arch.mmu_ready;
> > if (kvm->arch.mmu_ready) {
> > kvm->arch.mmu_ready = 0;
> > /* order mmu_ready vs. vcpus_running */
> > smp_mb();
> > if (atomic_read(>arch.vcpus_running)) {
> > kvm->arch.mmu_ready = 1;
> > ret = -EBUSY;
> > goto out_unlock;
> > }
> > }
> > 
> > and then after clearing kvm->arch.secure_guest below:
> > 
> > > + srcu_idx = srcu_read_lock(>srcu);
> > > + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> > > + struct kvm_memory_slot *memslot;
> > > + struct kvm_memslots *slots = __kvm_memslots(kvm, i);
> > > +
> > > + if (!slots)
> > > + continue;
> > > +
> > > + kvm_for_each_memslot(memslot, slots) {
> > > + kvmppc_uvmem_drop_pages(memslot, kvm, true);
> > > + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
> > > + }
> > > + }
> > > + srcu_read_unlock(>srcu, srcu_idx);
> > > +
> > > + ret = uv_svm_terminate(kvm->arch.lpid);
> > > + if (ret != U_SUCCESS) {
> > > + ret = -EINVAL;
> > > + goto out;
> > > + }
> > > +
> > > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > > + spin_lock(>arch.vpa_update_lock);
> > > + unpin_vpa_reset(kvm, >arch.dtl);
> > > + unpin_vpa_reset(kvm, >arch.slb_shadow);
> > > + unpin_vpa_reset(kvm, >arch.vpa);
> > > + spin_unlock(>arch.vpa_update_lock);
> > > + }
> > > +
> > > + ret = kvmppc_reinit_partition_table(kvm);
> > > + if (ret)
> > > + goto out;
> > > +
> > > + kvm->arch.secure_guest = 0;
> > 
> > you need to do:
> > 
> > kvm->arch.mmu_ready = mmu_was_ready;
> >  out_unlock:
> > mutex_unlock(>arch.mmu_setup_lock);
> > 
> > > +out:
> > > + return ret;
> > > +}
> > > +
> > 
> > With that extra check in place, it should be safe to unpin the vpas if
> > there is a good reason to do so.  ("Userspace has some bug that we
> > haven't found" isn't a good reason to do so.)
> 
> QEMU indeed does set_one_reg to reset the VPAs but that only marks
> the VPA update as pending. The actual unpinning happens when vcpu
> gets to run after reset at which time the VPAs are updated after
> any unpinning (if required)
> 
> When secure guest reboots, vpu 0 gets to run and does unpin its
> VPA pages and then proceeds with switching to secure. Here UV
> tries to page-in all the guest pages, including the still pinned
> VPA pages corresponding to other vcpus which haven't had a chance
> to run till now. They are all still pinned and hence page-in fails.
> 
> To prevent this, we have to explicitly unpin the VPA pages during
> this svm off ioctl. This will ensure that SMP secure guest is able
> to reboot correctly.

OK, that makes sense.  Please put a comment in the code explaining
this briefly.

> So I will incorporate the code chunk you have shown above to fail
> if any vcpu is running and prevent any vcpu from running when
> we unpin VPAs from this ioctl.

Sounds good.

Paul.


Re: [PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-13 Thread Bharata B Rao
On Tue, Nov 12, 2019 at 04:34:34PM +1100, Paul Mackerras wrote:
> On Mon, Nov 04, 2019 at 09:47:58AM +0530, Bharata B Rao wrote:
> [snip]
> > @@ -5442,6 +5471,64 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu 
> > *vcpu, ulong *eaddr, void *ptr,
> > return rc;
> >  }
> >  
> > +/*
> > + *  IOCTL handler to turn off secure mode of guest
> > + *
> > + * - Issue ucall to terminate the guest on the UV side
> > + * - Unpin the VPA pages (Enables these pages to be migrated back
> > + *   when VM becomes secure again)
> > + * - Recreate partition table as the guest is transitioning back to
> > + *   normal mode
> > + * - Release all device pages
> > + */
> > +static int kvmhv_svm_off(struct kvm *kvm)
> > +{
> > +   struct kvm_vcpu *vcpu;
> > +   int srcu_idx;
> > +   int ret = 0;
> > +   int i;
> > +
> > +   if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
> > +   return ret;
> > +
> 
> A further comment on this code: it should check that no vcpus are
> running and fail if any are running, and it should prevent any vcpus
> from running until the function is finished, using code like that in
> kvmhv_configure_mmu().  That is, it should do something like this:
> 
>   mutex_lock(>arch.mmu_setup_lock);
>   mmu_was_ready = kvm->arch.mmu_ready;
>   if (kvm->arch.mmu_ready) {
>   kvm->arch.mmu_ready = 0;
>   /* order mmu_ready vs. vcpus_running */
>   smp_mb();
>   if (atomic_read(>arch.vcpus_running)) {
>   kvm->arch.mmu_ready = 1;
>   ret = -EBUSY;
>   goto out_unlock;
>   }
>   }
> 
> and then after clearing kvm->arch.secure_guest below:
> 
> > +   srcu_idx = srcu_read_lock(>srcu);
> > +   for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> > +   struct kvm_memory_slot *memslot;
> > +   struct kvm_memslots *slots = __kvm_memslots(kvm, i);
> > +
> > +   if (!slots)
> > +   continue;
> > +
> > +   kvm_for_each_memslot(memslot, slots) {
> > +   kvmppc_uvmem_drop_pages(memslot, kvm, true);
> > +   uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
> > +   }
> > +   }
> > +   srcu_read_unlock(>srcu, srcu_idx);
> > +
> > +   ret = uv_svm_terminate(kvm->arch.lpid);
> > +   if (ret != U_SUCCESS) {
> > +   ret = -EINVAL;
> > +   goto out;
> > +   }
> > +
> > +   kvm_for_each_vcpu(i, vcpu, kvm) {
> > +   spin_lock(>arch.vpa_update_lock);
> > +   unpin_vpa_reset(kvm, >arch.dtl);
> > +   unpin_vpa_reset(kvm, >arch.slb_shadow);
> > +   unpin_vpa_reset(kvm, >arch.vpa);
> > +   spin_unlock(>arch.vpa_update_lock);
> > +   }
> > +
> > +   ret = kvmppc_reinit_partition_table(kvm);
> > +   if (ret)
> > +   goto out;
> > +
> > +   kvm->arch.secure_guest = 0;
> 
> you need to do:
> 
>   kvm->arch.mmu_ready = mmu_was_ready;
>  out_unlock:
>   mutex_unlock(>arch.mmu_setup_lock);
> 
> > +out:
> > +   return ret;
> > +}
> > +
> 
> With that extra check in place, it should be safe to unpin the vpas if
> there is a good reason to do so.  ("Userspace has some bug that we
> haven't found" isn't a good reason to do so.)

QEMU indeed does set_one_reg to reset the VPAs but that only marks
the VPA update as pending. The actual unpinning happens when vcpu
gets to run after reset at which time the VPAs are updated after
any unpinning (if required)

When secure guest reboots, vpu 0 gets to run and does unpin its
VPA pages and then proceeds with switching to secure. Here UV
tries to page-in all the guest pages, including the still pinned
VPA pages corresponding to other vcpus which haven't had a chance
to run till now. They are all still pinned and hence page-in fails.

To prevent this, we have to explicitly unpin the VPA pages during
this svm off ioctl. This will ensure that SMP secure guest is able
to reboot correctly.

So I will incorporate the code chunk you have shown above to fail
if any vcpu is running and prevent any vcpu from running when
we unpin VPAs from this ioctl.

Regards,
Bharata.



Re: [PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-11 Thread Paul Mackerras
On Mon, Nov 04, 2019 at 09:47:58AM +0530, Bharata B Rao wrote:
[snip]
> @@ -5442,6 +5471,64 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, 
> ulong *eaddr, void *ptr,
>   return rc;
>  }
>  
> +/*
> + *  IOCTL handler to turn off secure mode of guest
> + *
> + * - Issue ucall to terminate the guest on the UV side
> + * - Unpin the VPA pages (Enables these pages to be migrated back
> + *   when VM becomes secure again)
> + * - Recreate partition table as the guest is transitioning back to
> + *   normal mode
> + * - Release all device pages
> + */
> +static int kvmhv_svm_off(struct kvm *kvm)
> +{
> + struct kvm_vcpu *vcpu;
> + int srcu_idx;
> + int ret = 0;
> + int i;
> +
> + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
> + return ret;
> +

A further comment on this code: it should check that no vcpus are
running and fail if any are running, and it should prevent any vcpus
from running until the function is finished, using code like that in
kvmhv_configure_mmu().  That is, it should do something like this:

mutex_lock(>arch.mmu_setup_lock);
mmu_was_ready = kvm->arch.mmu_ready;
if (kvm->arch.mmu_ready) {
kvm->arch.mmu_ready = 0;
/* order mmu_ready vs. vcpus_running */
smp_mb();
if (atomic_read(>arch.vcpus_running)) {
kvm->arch.mmu_ready = 1;
ret = -EBUSY;
goto out_unlock;
}
}

and then after clearing kvm->arch.secure_guest below:

> + srcu_idx = srcu_read_lock(>srcu);
> + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> + struct kvm_memory_slot *memslot;
> + struct kvm_memslots *slots = __kvm_memslots(kvm, i);
> +
> + if (!slots)
> + continue;
> +
> + kvm_for_each_memslot(memslot, slots) {
> + kvmppc_uvmem_drop_pages(memslot, kvm, true);
> + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
> + }
> + }
> + srcu_read_unlock(>srcu, srcu_idx);
> +
> + ret = uv_svm_terminate(kvm->arch.lpid);
> + if (ret != U_SUCCESS) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + spin_lock(>arch.vpa_update_lock);
> + unpin_vpa_reset(kvm, >arch.dtl);
> + unpin_vpa_reset(kvm, >arch.slb_shadow);
> + unpin_vpa_reset(kvm, >arch.vpa);
> + spin_unlock(>arch.vpa_update_lock);
> + }
> +
> + ret = kvmppc_reinit_partition_table(kvm);
> + if (ret)
> + goto out;
> +
> + kvm->arch.secure_guest = 0;

you need to do:

kvm->arch.mmu_ready = mmu_was_ready;
 out_unlock:
mutex_unlock(>arch.mmu_setup_lock);

> +out:
> + return ret;
> +}
> +

With that extra check in place, it should be safe to unpin the vpas if
there is a good reason to do so.  ("Userspace has some bug that we
haven't found" isn't a good reason to do so.)

Paul.



Re: [PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-10 Thread Bharata B Rao
On Mon, Nov 11, 2019 at 04:28:06PM +1100, Paul Mackerras wrote:
> On Mon, Nov 04, 2019 at 09:47:58AM +0530, Bharata B Rao wrote:
> > Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
> > This ioctl will be issued by QEMU during reset and includes the
> > the following steps:
> > 
> > - Ask UV to terminate the guest via UV_SVM_TERMINATE ucall
> > - Unpin the VPA pages so that they can be migrated back to secure
> >   side when guest becomes secure again. This is required because
> >   pinned pages can't be migrated.
> 
> Unpinning the VPA pages is normally handled during VM reset by QEMU
> doing set_one_reg operations to set the values for the
> KVM_REG_PPC_VPA_ADDR, KVM_REG_PPC_VPA_SLB and KVM_REG_PPC_VPA_DTL
> pseudo-registers to zero.  Is there some reason why this isn't
> happening for a secure VM, and if so, what is that reason?
> If it is happening, then why do we need to unpin the pages explicitly
> here?

We were observing these VPA pages still remaining pinned during
reset and hence subsequent paging-in of these pages were failing.
Unpinning them fixed the problem.

I will investigate and get back on why exactly these pages weren't
gettting unpinned normally as part of reset.

> 
> > - Reinitialize guest's partitioned scoped page tables. These are
> >   freed when guest becomes secure (H_SVM_INIT_DONE)
> 
> It doesn't seem particularly useful to me to free the partition-scoped
> page tables when the guest becomes secure, and it feels like it makes
> things more fragile.  If you don't free them then, then you don't need
> to reallocate them now.

Sure, I will not free them in the next version.

Regards,
Bharata.



Re: [PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-10 Thread Paul Mackerras
On Mon, Nov 04, 2019 at 09:47:58AM +0530, Bharata B Rao wrote:
> Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
> This ioctl will be issued by QEMU during reset and includes the
> the following steps:
> 
> - Ask UV to terminate the guest via UV_SVM_TERMINATE ucall
> - Unpin the VPA pages so that they can be migrated back to secure
>   side when guest becomes secure again. This is required because
>   pinned pages can't be migrated.

Unpinning the VPA pages is normally handled during VM reset by QEMU
doing set_one_reg operations to set the values for the
KVM_REG_PPC_VPA_ADDR, KVM_REG_PPC_VPA_SLB and KVM_REG_PPC_VPA_DTL
pseudo-registers to zero.  Is there some reason why this isn't
happening for a secure VM, and if so, what is that reason?
If it is happening, then why do we need to unpin the pages explicitly
here?

> - Reinitialize guest's partitioned scoped page tables. These are
>   freed when guest becomes secure (H_SVM_INIT_DONE)

It doesn't seem particularly useful to me to free the partition-scoped
page tables when the guest becomes secure, and it feels like it makes
things more fragile.  If you don't free them then, then you don't need
to reallocate them now.

> - Release all device pages of the secure guest.
> 
> After these steps, guest is ready to issue UV_ESM call once again
> to switch to secure mode.

Paul.


[PATCH v10 6/8] KVM: PPC: Support reset of secure guest

2019-11-03 Thread Bharata B Rao
Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
This ioctl will be issued by QEMU during reset and includes the
the following steps:

- Ask UV to terminate the guest via UV_SVM_TERMINATE ucall
- Unpin the VPA pages so that they can be migrated back to secure
  side when guest becomes secure again. This is required because
  pinned pages can't be migrated.
- Reinitialize guest's partitioned scoped page tables. These are
  freed when guest becomes secure (H_SVM_INIT_DONE)
- Release all device pages of the secure guest.

After these steps, guest is ready to issue UV_ESM call once again
to switch to secure mode.

Signed-off-by: Bharata B Rao 
Signed-off-by: Sukadev Bhattiprolu 
[Implementation of uv_svm_terminate() and its call from
guest shutdown path]
Signed-off-by: Ram Pai 
[Unpinning of VPA pages]
---
 Documentation/virt/kvm/api.txt| 19 +
 arch/powerpc/include/asm/kvm_ppc.h|  2 +
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  5 ++
 arch/powerpc/kvm/book3s_hv.c  | 88 +++
 arch/powerpc/kvm/book3s_hv_uvmem.c|  6 ++
 arch/powerpc/kvm/powerpc.c| 12 
 include/uapi/linux/kvm.h  |  1 +
 8 files changed, 134 insertions(+)

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..1b2e1d2002ba 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -4126,6 +4126,25 @@ Valid values for 'action':
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
+4.121 KVM_PPC_SVM_OFF
+
+Capability: basic
+Architectures: powerpc
+Type: vm ioctl
+Parameters: none
+Returns: 0 on successful completion,
+Errors:
+  EINVAL:if ultravisor failed to terminate the secure guest
+  ENOMEM:if hypervisor failed to allocate new radix page tables for guest
+
+This ioctl is used to turn off the secure mode of the guest or transition
+the guest from secure mode to normal mode. This is invoked when the guest
+is reset. This has no effect if called for a normal guest.
+
+This ioctl issues an ultravisor call to terminate the secure guest,
+unpins the VPA pages, reinitializes guest's partition scoped page
+tables and releases all the device pages that are used to track the
+secure pages by hypervisor.
 
 5. The kvm_run structure
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index ee62776e5433..6d1bb597fe95 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -177,6 +177,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm 
*kvm,
 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
 extern void kvmppc_setup_partition_table(struct kvm *kvm);
+int kvmppc_reinit_partition_table(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
@@ -321,6 +322,7 @@ struct kvmppc_ops {
   int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
  int size);
+   int (*svm_off)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 4b0d044caa2a..b66f6db7be6c 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -34,5 +34,6 @@
 #define UV_UNSHARE_PAGE0xF134
 #define UV_UNSHARE_ALL_PAGES   0xF140
 #define UV_PAGE_INVAL  0xF138
+#define UV_SVM_TERMINATE   0xF13C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index b8e59b7b4ac8..790b0e63681f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -77,4 +77,9 @@ static inline int uv_page_inval(u64 lpid, u64 gpa, u64 
page_shift)
return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
 }
 
+static inline int uv_svm_terminate(u64 lpid)
+{
+   return ucall_norets(UV_SVM_TERMINATE, lpid);
+}
+
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cb7ae1e9e4f2..d2bc4e9bbe7e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2443,6 +2443,15 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa 
*vpa)
vpa->dirty);
 }
 
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+   unpin_vpa(kvm, vpa);
+   vpa->gpa = 0;
+   vpa->pinned_addr = NULL;
+   vpa->dirty = false;
+   vpa->update_pending = 0;
+}
+
 static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu