Thomas Huth <th...@redhat.com> writes:

> On 20/08/2025 00.39, Fabiano Rosas wrote:
>> The commit referenced (from QEMU 10.0) has changed the way the pseries
>> machine marks a cpu as quiesced. Previously, the cpu->halted value
>> from QEMU common cpu code was (incorrectly) used. With the fix, the
>> env->quiesced variable starts being used, which improves on the
>> original situation, but also causes a side effect after migration:
>> 
>> The env->quiesced is set at reset and never migrated, which causes the
>> destination QEMU to stop delivering interrupts and hang the machine.
>> 
>> To fix the issue from this point on, start migrating the env->quiesced
>> value.
>> 
>> For QEMU versions < 10.0, sending the new element on the stream would
>> cause migration to be aborted, so add the appropriate compatibility
>> property to omit the new subsection.
>> 
>> Independently of this patch, all migrations from QEMU versions < 10.0
>> will result in a hang since the older QEMU never migrates
>> env->quiesced. This is bad because it leaves machines already running
>> on the old QEMU without a migration path into newer versions.
>> 
>> As a workaround, clear env->quiesced in the new QEMU whenever
>> cpu->halted is also clear. This assumes rtas_stop_self() always sets
>> both flags at the same time. Migrations during secondaries bringup
>> (i.e. before rtas-start-cpu) will still cause a hang, but those are
>> early enough that requiring reboot would not be unreasonable.
>> 
>> Note that this was tested with -cpu power9 and -machine ic-mode=xive
>> due to another bug affecting migration of XICS guests. Tested both
>> forward and backward migration and savevm/loadvm from 9.2 and 10.0.
>> 
>> Reported-by: Fabian Vogt <fv...@suse.de>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3079
>> Fixes: fb802acdc8b ("ppc/spapr: Fix RTAS stopped state")
>> Signed-off-by: Fabiano Rosas <faro...@suse.de>
>> ---
>> The choice of PowerPCCPU to hold the compat property is dubious. This
>> only affects pseries, but it seems like a layering violation to access
>> SpaprMachine from target/ppc/, suggestions welcome.
>> ---
>>   hw/core/machine.c     |  1 +
>>   target/ppc/cpu.h      |  1 +
>>   target/ppc/cpu_init.c |  7 +++++++
>>   target/ppc/machine.c  | 40 ++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 49 insertions(+)
>> 
>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> index bd47527479..ea83c0876b 100644
>> --- a/hw/core/machine.c
>> +++ b/hw/core/machine.c
>> @@ -42,6 +42,7 @@ GlobalProperty hw_compat_10_0[] = {
>>       { "vfio-pci", "x-migration-load-config-after-iter", "off" },
>>       { "ramfb", "use-legacy-x86-rom", "true"},
>>       { "vfio-pci-nohotplug", "use-legacy-x86-rom", "true" },
>> +    { "powerpc64-cpu", "rtas-stopped-state", "false" },
>
> This is specific to ppc, so it should not go into the generic hw_compat_* 
> array.
>

So arm-cpu in hw_compat_9_2 should not be there? 

> Please define a spapr_compat_10_0 array in 
> spapr_machine_10_0_class_options() and do another compat_props_add() for 
> that array there. (Similar to what is done for TYPE_SPAPR_PCI_HOST_BRIDGE in
> spapr_machine_5_0_class_options() for example)
>

Ok, thanks for the pointer.

>   Thanks,
>    Thomas
>
>
>>   };
>>   const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0);
>>   
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 6b90543811..8ff453024b 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -1470,6 +1470,7 @@ struct ArchCPU {
>>       void *machine_data;
>>       int32_t node_id; /* NUMA node this CPU belongs to */
>>       PPCHash64Options *hash64_opts;
>> +    bool rtas_stopped_state;
>>   
>>       /* Those resources are used only during code translation */
>>       /* opcode handlers */
>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
>> index a0e77f2673..4380c6eb14 100644
>> --- a/target/ppc/cpu_init.c
>> +++ b/target/ppc/cpu_init.c
>> @@ -55,6 +55,11 @@
>>   /* #define PPC_DEBUG_SPR */
>>   /* #define USE_APPLE_GDB */
>>   
>> +static const Property powerpc_cpu_properties[] = {
>> +    DEFINE_PROP_BOOL("rtas-stopped-state", PowerPCCPU,
>> +                      rtas_stopped_state, true),
>> +};
>> +
>>   static inline void vscr_init(CPUPPCState *env, uint32_t val)
>>   {
>>       /* Altivec always uses round-to-nearest */
>> @@ -7525,6 +7530,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, const 
>> void *data)
>>                                         &pcc->parent_unrealize);
>>       pcc->pvr_match = ppc_pvr_match_default;
>>   
>> +    device_class_set_props(dc, powerpc_cpu_properties);
>> +
>>       resettable_class_set_parent_phases(rc, NULL, ppc_cpu_reset_hold, NULL,
>>                                          &pcc->parent_phases);
>>   
>> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
>> index d72e5ecb94..8797233ebe 100644
>> --- a/target/ppc/machine.c
>> +++ b/target/ppc/machine.c
>> @@ -257,6 +257,23 @@ static int cpu_post_load(void *opaque, int version_id)
>>           ppc_store_sdr1(env, env->spr[SPR_SDR1]);
>>       }
>>   
>> +    if (!cpu->rtas_stopped_state) {
>> +        /*
>> +         * The source QEMU doesn't have fb802acdc8 and still uses halt
>> +         * + PM bits in LPCR to implement RTAS stopped state. The new
>> +         * QEMU will have put the newly created vcpus in that state,
>> +         * waiting for the start-cpu RTAS call. Clear the quiesced
>> +         * flag if possible, otherwise the newly-loaded machine will
>> +         * hang indefinitely due to quiesced state ignoring
>> +         * interrupts.
>> +         */
>> +
>> +        if (!CPU(cpu)->halted) {
>> +            /* not halted, so definitely not in RTAS stopped state */
>> +            env->quiesced = 0;
>> +        }
>> +    }
>> +
>>       post_load_update_msr(env);
>>   
>>       if (tcg_enabled()) {
>> @@ -649,6 +666,28 @@ static const VMStateDescription vmstate_reservation = {
>>       }
>>   };
>>   
>> +static bool rtas_stopped_needed(void *opaque)
>> +{
>> +    PowerPCCPU *cpu = opaque;
>> +
>> +    return cpu->rtas_stopped_state && !cpu->env.quiesced;
>> +}
>> +
>> +static const VMStateDescription vmstate_rtas_stopped = {
>> +    .name = "cpu/rtas_stopped",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .needed = rtas_stopped_needed,
>> +    .fields = (const VMStateField[]) {
>> +        /*
>> +         * "RTAS stopped" state, independent of halted state. For QEMU
>> +         * < 10.0, this is taken from cpu->halted at cpu_post_load()
>> +         */
>> +        VMSTATE_BOOL(env.quiesced, PowerPCCPU),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>   #ifdef TARGET_PPC64
>>   static bool bhrb_needed(void *opaque)
>>   {
>> @@ -715,6 +754,7 @@ const VMStateDescription vmstate_ppc_cpu = {
>>           &vmstate_tlbmas,
>>           &vmstate_compat,
>>           &vmstate_reservation,
>> +        &vmstate_rtas_stopped,
>>           NULL
>>       }
>>   };

Reply via email to