> On 12 Dec 2025, at 8:33 PM, Ani Sinha <[email protected]> wrote:
> 
> This change adds common kvm specific support to handle KVM VM file descriptor
> change. KVM VM file descriptor can change as a part of confidential guest 
> reset
> mechanism. A new function api kvm_arch_vmfd_change_ops() per
> architecture platform is added in order to implement architecture specific
> changes required to support it. A subsequent patch will add x86 specific
> implementation for kvm_arch_vmfd_change_ops as currently only x86 supports
> confidential guest reset.

Some more fixes on this patch are in order which I will include in the next 
spin up.

<snip>

> 
> +static int kvm_reset_vmfd(MachineState *ms)
> +{
> +    KVMState *s;
> +    KVMMemoryListener *kml;
> +    int ret, type;
> +    Error *err = NULL;
> +
> +    s = KVM_STATE(ms->accelerator);
> +    kml = &s->memory_listener;
> +
> +    memory_listener_unregister(&kml->listener);
> +    memory_listener_unregister(&kvm_io_listener);
> +
> +    if (s->vmfd >= 0) {
> +        close(s->vmfd);
> +    }
> +
> +    type = find_kvm_machine_type(ms);
> +    if (type < 0) {
> +        return -EINVAL;
> +    }
> +
> +    ret = do_kvm_create_vm(s, type);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    s->vmfd = ret;
> +
> +    kvm_setup_dirty_ring(s);
> +
> +    /* rebind memory to new vm fd */
> +    ret = ram_block_rebind(&err);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    assert(!err);
> +
> +    ret = kvm_arch_vmfd_change_ops(ms, s);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    if (s->kernel_irqchip_allowed) {
> +        do_kvm_irqchip_create(s);
> +    }
> +
> +    /* these can be only called after ram_block_rebind() */
> +    memory_listener_register(&kml->listener, &address_space_memory);
> +    memory_listener_register(&kvm_io_listener, &address_space_io);
> +
> +    /*
> +     * kvm fd has changed. Commit the irq routes to KVM once more.
> +     */
> +    kvm_irqchip_commit_routes(s);
> +
> +    return ret;
> +}
> +
> static int kvm_init(AccelState *as, MachineState *ms)
> {
>     MachineClass *mc = MACHINE_GET_CLASS(ms);
> @@ -4014,6 +4077,7 @@ static void kvm_accel_class_init(ObjectClass *oc, const 
> void *data)
>     AccelClass *ac = ACCEL_CLASS(oc);
>     ac->name = "KVM";
>     ac->init_machine = kvm_init;
> +    ac->reset_vmfd = kvm_reset_vmfd;
>     ac->has_memory = kvm_accel_has_memory;
>     ac->allowed = &kvm_allowed;
>     ac->gdbstub_supported_sstep_flags = kvm_gdbstub_sstep_flags;
> diff --git a/include/system/kvm.h b/include/system/kvm.h
> index 8f9eecf044..ade13dd8cc 100644
> --- a/include/system/kvm.h
> +++ b/include/system/kvm.h
> @@ -358,6 +358,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s);
> int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
> int kvm_arch_init_vcpu(CPUState *cpu);
> int kvm_arch_destroy_vcpu(CPUState *cpu);
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s);

Another API should be added here:
bool kvm_arch_supports_vmfd_change(void)

It should tell kvm_reset_vmfd() whether the underlying architectures would 
support all operations that need to be redone after vm file descriptor change. 
If not, it should bail. Something like:

    /*                                                                          
                                                           
     * bail if the current architecture does not support VM file                
                                                           
     * descriptor change.                                                       
                                                           
     */                                                                         
                                                           
    if (!kvm_arch_supports_vmfd_change()) {                                     
                                                           
        error_report("This target architecture does not support KVM VM "        
                                                                  
                     "file descriptor change.");                                
                                                           
        return -EOPNOTSUPP;                                                     
                                                           
    } 

> 
> #ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU
> void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd);
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 0d57081e69..919bf95ae1 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -1568,6 +1568,11 @@ void kvm_arch_init_irq_routing(KVMState *s)
> {
> }
> 
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> +    abort();
> +}
> +
> int kvm_arch_irqchip_create(KVMState *s)
> {
>     if (kvm_kernel_irqchip_split()) {
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 02819de625..cdfcb70f40 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -3252,6 +3252,11 @@ static int kvm_vm_enable_energy_msrs(KVMState *s)
>     return 0;
> }
> 
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> +    abort();
> +}
> +
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
>     int ret;
> diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
> index 26e40c9bdc..4171781346 100644
> --- a/target/loongarch/kvm/kvm.c
> +++ b/target/loongarch/kvm/kvm.c
> @@ -1312,6 +1312,11 @@ int kvm_arch_irqchip_create(KVMState *s)
>     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
> }
> 
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> +    return 0;

These and others should abort() uniformly instead of silently returning sucess.

> +}
> +
> void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
> {
> }
> diff --git a/target/mips/kvm.c b/target/mips/kvm.c
> index 912cd5dfa0..28730da06b 100644
> --- a/target/mips/kvm.c
> +++ b/target/mips/kvm.c
> @@ -44,6 +44,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cs)
>     return cs->cpu_index;
> }
> 
> +int kvm_arch_vmfd_change_ops(MachineState *ms, KVMState *s)
> +{
> +    return 0;
> +}
> +
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
>     /* MIPS has 128 signals */
> dif


Reply via email to