> On 12 Dec 2025, at 8:33 PM, Ani Sinha <[email protected]> wrote:
>
> This change adds common kvm specific support to handle KVM VM file descriptor
> change. KVM VM file descriptor can change as a part of confidential guest
> reset
> mechanism. A new function api kvm_arch_vmfd_change_ops() per
> architecture platform is added in order to implement architecture specific
> changes required to support it. A subsequent patch will add x86 specific
> implementation for kvm_arch_vmfd_change_ops as currently only x86 supports
> confidential guest reset.
Some more fixes on this patch are in order which I will include in the next
spin up.
<snip>
>
> +static int kvm_reset_vmfd(MachineState *ms)
> +{
> + KVMState *s;
> + KVMMemoryListener *kml;
> + int ret, type;
> + Error *err = NULL;
> +
> + s = KVM_STATE(ms->accelerator);
> + kml = &s->memory_listener;
> +
> + memory_listener_unregister(&kml->listener);
> + memory_listener_unregister(&kvm_io_listener);
> +
> + if (s->vmfd >= 0) {
> + close(s->vmfd);
> + }
> +
> + type = find_kvm_machine_type(ms);
> + if (type < 0) {
> + return -EINVAL;
> + }
> +
> + ret = do_kvm_create_vm(s, type);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + s->vmfd = ret;
> +
> + kvm_setup_dirty_ring(s);
> +
> + /* rebind memory to new vm fd */
> + ret = ram_block_rebind(&err);
> + if (ret < 0) {
> + return ret;
> + }
> + assert(!err);
> +
> + ret = kvm_arch_vmfd_change_ops(ms, s);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + if (s->kernel_irqchip_allowed) {
> + do_kvm_irqchip_create(s);
> + }
> +
> + /* these can be only called after ram_block_rebind() */
> + memory_listener_register(&kml->listener, &address_space_memory);
> + memory_listener_register(&kvm_io_listener, &address_space_io);
> +
> + /*
> + * kvm fd has changed. Commit the irq routes to KVM once more.
> + */
> + kvm_irqchip_commit_routes(s);
> +
> + return ret;
> +}
> +
> static int kvm_init(AccelState *as, MachineState *ms)
> {
> MachineClass *mc = MACHINE_GET_CLASS(ms);
> @@ -4014,6 +4077,7 @@ static void kvm_accel_class_init(ObjectClass *oc, const
> void *data)
> AccelClass *ac = ACCEL_CLASS(oc);
> ac->name = "KVM";
> ac->init_machine = kvm_init;
> + ac->reset_vmfd = kvm_reset_vmfd;
> ac->has_memory = kvm_accel_has_memory;
> ac->allowed = &kvm_allowed;
> ac->gdbstub_supported_sstep_flags = kvm_gdbstub_sstep_flags;
> diff --git a/include/system/kvm.h b/include/system/kvm.h
> index 8f9eecf044..ade13dd8cc 100644
> --- a/include/system/kvm.h
> +++ b/include/system/kvm.h
> @@ -358,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s);
> int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
> int kvm_arch_init_vcpu(CPUState *cpu);
> int kvm_arch_destroy_vcpu(CPUState *cpu);
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s);
Another API should be added here:
bool kvm_arch_supports_vmfd_change(void)
It should tell kvm_reset_vmfd() whether the underlying architectures would
support all operations that need to be redone after vm file descriptor change.
If not, it should bail. Something like:
/*
* bail if the current architecture does not support VM file
* descriptor change.
*/
if (!kvm_arch_supports_vmfd_change()) {
error_report("This target architecture does not support KVM VM "
"file descriptor change.");
return -EOPNOTSUPP;
}
>
> #ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU
> void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd);
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 0d57081e69..919bf95ae1 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -1568,6 +1568,11 @@ void kvm_arch_init_irq_routing(KVMState *s)
> {
> }
>
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> + abort();
> +}
> +
> int kvm_arch_irqchip_create(KVMState *s)
> {
> if (kvm_kernel_irqchip_split()) {
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 02819de625..cdfcb70f40 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -3252,6 +3252,11 @@ static int kvm_vm_enable_energy_msrs(KVMState *s)
> return 0;
> }
>
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> + abort();
> +}
> +
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
> int ret;
> diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
> index 26e40c9bdc..4171781346 100644
> --- a/target/loongarch/kvm/kvm.c
> +++ b/target/loongarch/kvm/kvm.c
> @@ -1312,6 +1312,11 @@ int kvm_arch_irqchip_create(KVMState *s)
> return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
> }
>
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> + return 0;
These and others should abort() uniformly instead of silently returning sucess.
> +}
> +
> void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
> {
> }
> diff --git a/target/mips/kvm.c b/target/mips/kvm.c
> index 912cd5dfa0..28730da06b 100644
> --- a/target/mips/kvm.c
> +++ b/target/mips/kvm.c
> @@ -44,6 +44,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cs)
> return cs->cpu_index;
> }
>
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> + return 0;
> +}
> +
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
> /* MIPS has 128 signals */
> dif