Confidential guests needs to generate a new KVM file descriptor upon virtual machine reset. Existing VCPUs needs to be reattached to this new KVM VM file descriptor. As a part of this, new VCPU file descriptors against this new KVM VM file descriptor needs to be created and re-initialized. Resources allocated against the old VCPU fds needs to be released. This change makes this happen.
Signed-off-by: Ani Sinha <[email protected]> --- accel/kvm/kvm-all.c | 201 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 166 insertions(+), 35 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 5b854c9866..638f193626 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -130,6 +130,12 @@ static NotifierWithReturnList register_vmfd_changed_notifiers = static NotifierWithReturnList register_vmfd_pre_change_notifiers = NOTIFIER_WITH_RETURN_LIST_INITIALIZER(register_vmfd_pre_change_notifiers); +static int kvm_rebind_vcpus(Error **errp); + +static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp); +static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp); +static int vcpu_unmap_regions(KVMState *s, CPUState *cpu); + struct KVMResampleFd { int gsi; EventNotifier *resample_event; @@ -423,6 +429,82 @@ err: return ret; } +static int kvm_rebind_vcpus(Error **errp) +{ + CPUState *cpu; + unsigned long vcpu_id; + KVMState *s = kvm_state; + int kvm_fd, ret = 0; + + CPU_FOREACH(cpu) { + vcpu_id = kvm_arch_vcpu_id(cpu); + + if (cpu->kvm_fd) { + close(cpu->kvm_fd); + } + + ret = kvm_arch_destroy_vcpu(cpu); + if (ret < 0) { + goto err; + } + + if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) { + s->coalesced_mmio_ring = NULL; + } + + ret = vcpu_unmap_regions(s, cpu); + if (ret < 0) { + goto err; + } + + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); + if (kvm_fd < 0) { + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu (%s)", + vcpu_id, strerror(kvm_fd)); + return kvm_fd; + } + + cpu->kvm_fd = kvm_fd; + + cpu->vcpu_dirty = false; + cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; + + ret = map_kvm_run(s, cpu, errp); + if (ret < 0) { + goto err; + } + + if (s->kvm_dirty_ring_size) { + ret = map_kvm_dirty_gfns(s, cpu, errp); + if (ret < 0) { + goto err; + } + } + + ret = kvm_arch_init_vcpu(cpu); + if (ret < 0) { + error_setg_errno(errp, -ret, + "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", + vcpu_id); + } + + close(cpu->kvm_vcpu_stats_fd); + cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); + kvm_init_cpu_signals(cpu); + + kvm_cpu_synchronize_post_init(cpu); + } + + err: + return ret; +} + static void kvm_park_vcpu(CPUState *cpu) { struct KVMParkedVcpu *vcpu; @@ -511,19 +593,11 @@ int kvm_create_and_park_vcpu(CPUState *cpu) return ret; } -static int do_kvm_destroy_vcpu(CPUState *cpu) +static int vcpu_unmap_regions(KVMState *s, CPUState *cpu) { - KVMState *s = kvm_state; int mmap_size; int ret = 0; - trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); - - ret = kvm_arch_destroy_vcpu(cpu); - if (ret < 0) { - goto err; - } - mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { ret = mmap_size; @@ -551,39 +625,47 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) cpu->kvm_dirty_gfns = NULL; } - kvm_park_vcpu(cpu); -err: + err: return ret; } -void kvm_destroy_vcpu(CPUState *cpu) -{ - if (do_kvm_destroy_vcpu(cpu) < 0) { - error_report("kvm_destroy_vcpu failed"); - exit(EXIT_FAILURE); - } -} - -int kvm_init_vcpu(CPUState *cpu, Error **errp) +static int do_kvm_destroy_vcpu(CPUState *cpu) { KVMState *s = kvm_state; - int mmap_size; - int ret; + int ret = 0; - trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); - ret = kvm_arch_pre_create_vcpu(cpu, errp); + ret = kvm_arch_destroy_vcpu(cpu); if (ret < 0) { goto err; } - ret = kvm_create_vcpu(cpu); + /* If I am the CPU that created coalesced_mmio_ring, then discard it */ + if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) { + s->coalesced_mmio_ring = NULL; + } + + ret = vcpu_unmap_regions(s, cpu); if (ret < 0) { - error_setg_errno(errp, -ret, - "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", - kvm_arch_vcpu_id(cpu)); goto err; } + kvm_park_vcpu(cpu); +err: + return ret; +} + +void kvm_destroy_vcpu(CPUState *cpu) +{ + if (do_kvm_destroy_vcpu(cpu) < 0) { + error_report("kvm_destroy_vcpu failed"); + exit(EXIT_FAILURE); + } +} + +static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp) +{ + int mmap_size, ret = 0; mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -608,14 +690,53 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE; } + err: + return ret; +} + +static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp) +{ + int ret = 0; + /* Use MAP_SHARED to share pages with the kernel */ + cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes, + PROT_READ | PROT_WRITE, MAP_SHARED, + cpu->kvm_fd, + PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET); + if (cpu->kvm_dirty_gfns == MAP_FAILED) { + ret = -errno; + } + + return ret; +} + +int kvm_init_vcpu(CPUState *cpu, Error **errp) +{ + KVMState *s = kvm_state; + int ret; + + trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + + ret = kvm_create_vcpu(cpu); + if (ret < 0) { + error_setg_errno(errp, -ret, + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + goto err; + } + + ret = map_kvm_run(s, cpu, errp); + if (ret < 0) { + goto err; + } + if (s->kvm_dirty_ring_size) { - /* Use MAP_SHARED to share pages with the kernel */ - cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes, - PROT_READ | PROT_WRITE, MAP_SHARED, - cpu->kvm_fd, - PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET); - if (cpu->kvm_dirty_gfns == MAP_FAILED) { - ret = -errno; + ret = map_kvm_dirty_gfns(s, cpu, errp); + if (ret < 0) { goto err; } } @@ -2716,6 +2837,16 @@ static int kvm_reset_vmfd(MachineState *ms) } assert(!err); + /* + * rebind new vcpu fds with the new kvm fds + * These can only be called after kvm_arch_vmfd_change_ops() + */ + ret = kvm_rebind_vcpus(&err); + if (ret < 0) { + return ret; + } + assert(!err); + /* these can be only called after ram_block_rebind() */ memory_listener_register(&kml->listener, &address_space_memory); memory_listener_register(&kvm_io_listener, &address_space_io); -- 2.42.0
