Some ARM CPUs advertise themselves as SMT by having the MT[24] bit set to 1 in the MPIDR register. These CPUs have the thread id in Aff0[7:0] bits, CPU id in Aff1[15:8] bits and cluster id in Aff2[23:16] bits in MPIDR.
On the other hand, ARM CPUs without SMT have the MT[24] bit set to 0, CPU id in Aff0[7:0] bits and cluster id in Aff1[15:8] bits in MPIDR. The mpidr_read_val() function always reported non-SMT i.e., MT=0 style MPIDR value which means it was wrong for the following CPUs with SMT supported by QEMU: - cortex-a55 - cortex-a76 - cortex-a710 - neoverse-v1 - neoverse-n1 - neoverse-n2 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1608 Signed-off-by: Dorjoy Chowdhury <dorjoychy...@gmail.com> --- hw/arm/npcm7xx.c | 2 +- hw/arm/sbsa-ref.c | 21 ++++++++++++++++----- hw/arm/virt.c | 18 +++++++++++++++--- target/arm/cpu.c | 14 ++++++++++++-- target/arm/cpu.h | 5 ++++- target/arm/helper.c | 4 ++++ target/arm/tcg/cpu64.c | 12 ++++++++++++ 7 files changed, 64 insertions(+), 12 deletions(-) diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index cc68b5d8f1..9d5dcf1a3f 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -487,7 +487,7 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) /* CPUs */ for (i = 0; i < nc->num_cpus; i++) { object_property_set_int(OBJECT(&s->cpu[i]), "mp-affinity", - arm_build_mp_affinity(i, NPCM7XX_MAX_NUM_CPUS), + arm_build_mp_affinity(ARM_CPU(&s->cpu[i]), i, NPCM7XX_MAX_NUM_CPUS), &error_abort); object_property_set_int(OBJECT(&s->cpu[i]), "reset-cbar", NPCM7XX_GIC_CPU_IF_ADDR, &error_abort); diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index f5709d6c14..dd42788f23 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -147,10 +147,10 @@ static const int sbsa_ref_irqmap[] = { [SBSA_GWDT_WS0] = 16, }; -static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx) +static uint64_t sbsa_ref_cpu_mp_affinity(ARMCPU *cpu, int idx) { uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER; - return arm_build_mp_affinity(idx, clustersz); + return arm_build_mp_affinity(cpu, idx, clustersz); } static void sbsa_fdt_add_gic_node(SBSAMachineState *sms) @@ -254,7 +254,7 @@ static void create_fdt(SBSAMachineState *sms) char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); CPUState *cs = CPU(armcpu); - uint64_t mpidr = sbsa_ref_cpu_mp_affinity(sms, cpu); + uint64_t mpidr = sbsa_ref_cpu_mp_affinity(armcpu, cpu); qemu_fdt_add_subnode(sms->fdt, nodename); qemu_fdt_setprop_u64(sms->fdt, nodename, "reg", mpidr); @@ -816,8 +816,9 @@ static void sbsa_ref_init(MachineState *machine) static const CPUArchIdList *sbsa_ref_possible_cpu_arch_ids(MachineState *ms) { unsigned int max_cpus = ms->smp.max_cpus; - SBSAMachineState *sms = SBSA_MACHINE(ms); int n; + Object *cpuobj; + ARMCPU *armcpu; if (ms->possible_cpus) { assert(ms->possible_cpus->len == max_cpus); @@ -827,13 +828,23 @@ static const CPUArchIdList *sbsa_ref_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + sizeof(CPUArchId) * max_cpus); ms->possible_cpus->len = max_cpus; + + /* + * Instantiate a temporary CPU object to build mp_affinity + * of the possible CPUs. + */ + cpuobj = object_new(ms->cpu_type); + armcpu = ARM_CPU(cpuobj); + for (n = 0; n < ms->possible_cpus->len; n++) { ms->possible_cpus->cpus[n].type = ms->cpu_type; ms->possible_cpus->cpus[n].arch_id = - sbsa_ref_cpu_mp_affinity(sms, n); + sbsa_ref_cpu_mp_affinity(armcpu, n); ms->possible_cpus->cpus[n].props.has_thread_id = true; ms->possible_cpus->cpus[n].props.thread_id = n; } + + object_unref(cpuobj); return ms->possible_cpus; } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a9a913aead..fe6d13c08f 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1703,7 +1703,7 @@ void virt_machine_done(Notifier *notifier, void *data) virt_build_smbios(vms); } -static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) +static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, ARMCPU *cpu, int idx) { uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER; VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); @@ -1723,7 +1723,7 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) clustersz = GICV3_TARGETLIST_BITS; } } - return arm_build_mp_affinity(idx, clustersz); + return arm_build_mp_affinity(cpu, idx, clustersz); } static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, @@ -2683,6 +2683,8 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) unsigned int max_cpus = ms->smp.max_cpus; VirtMachineState *vms = VIRT_MACHINE(ms); MachineClass *mc = MACHINE_GET_CLASS(vms); + Object *cpuobj; + ARMCPU *armcpu; if (ms->possible_cpus) { assert(ms->possible_cpus->len == max_cpus); @@ -2692,10 +2694,18 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + sizeof(CPUArchId) * max_cpus); ms->possible_cpus->len = max_cpus; + + /* + * Instantiate a temporary CPU object to build mp_affinity + * of the possible CPUs. + */ + cpuobj = object_new(ms->cpu_type); + armcpu = ARM_CPU(cpuobj); + for (n = 0; n < ms->possible_cpus->len; n++) { ms->possible_cpus->cpus[n].type = ms->cpu_type; ms->possible_cpus->cpus[n].arch_id = - virt_cpu_mp_affinity(vms, n); + virt_cpu_mp_affinity(vms, armcpu, n); assert(!mc->smp_props.dies_supported); ms->possible_cpus->cpus[n].props.has_socket_id = true; @@ -2711,6 +2721,8 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[n].props.thread_id = n % ms->smp.threads; } + + object_unref(cpuobj); return ms->possible_cpus; } diff --git a/target/arm/cpu.c b/target/arm/cpu.c index ab8d007a86..34ee98f5f9 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1314,8 +1314,18 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz) +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz) { + if (cpu->has_smt) { + /* + * Right now, the ARM CPUs with SMT supported by QEMU only have + * one thread per core. So Aff0 is always 0. + */ + uint32_t Aff2 = idx / clustersz; + uint32_t Aff1 = idx % clustersz; + uint32_t Aff0 = 0; + return (Aff2 << ARM_AFF2_SHIFT) | (Aff1 << ARM_AFF1_SHIFT) | Aff0; + } uint32_t Aff1 = idx / clustersz; uint32_t Aff0 = idx % clustersz; return (Aff1 << ARM_AFF1_SHIFT) | Aff0; @@ -2136,7 +2146,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * so these bits always RAZ. */ if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) { - cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index, + cpu->mp_affinity = arm_build_mp_affinity(cpu, cs->cpu_index, ARM_DEFAULT_CPUS_PER_CLUSTER); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index bc0c84873f..57343c7e24 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -948,6 +948,9 @@ struct ArchCPU { /* Uniprocessor system with MP extensions */ bool mp_is_up; + /* Arm cores with SMT support */ + bool has_smt; + /* True if we tried kvm_arm_host_cpu_features() during CPU instance_init * and the probe failed (so we need to report the error in realize) */ @@ -1140,7 +1143,7 @@ void arm_cpu_post_init(Object *obj); (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK) #define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK) -uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz); +uint64_t arm_build_mp_affinity(ARMCPU *cpu, int idx, uint8_t clustersz); #ifndef CONFIG_USER_ONLY extern const VMStateDescription vmstate_arm_cpu; diff --git a/target/arm/helper.c b/target/arm/helper.c index a620481d7c..3e09bc950b 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4676,6 +4676,10 @@ static uint64_t mpidr_read_val(CPUARMState *env) mpidr |= (1u << 30); } } + + if (cpu->has_smt) { + mpidr |= (1U << 24); + } return mpidr; } diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 9f7a9f3d2c..8807809842 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -289,6 +289,8 @@ static void aarch64_a55_initfn(Object *obj) /* From D5.4 AArch64 PMU register summary */ cpu->isar.reset_pmcr_el0 = 0x410b3000; + + cpu->has_smt = true; } static void aarch64_a72_initfn(Object *obj) @@ -413,6 +415,8 @@ static void aarch64_a76_initfn(Object *obj) /* From D5.1 AArch64 PMU register summary */ cpu->isar.reset_pmcr_el0 = 0x410b3000; + + cpu->has_smt = true; } static void aarch64_a64fx_initfn(Object *obj) @@ -652,6 +656,8 @@ static void aarch64_neoverse_n1_initfn(Object *obj) /* From D5.1 AArch64 PMU register summary */ cpu->isar.reset_pmcr_el0 = 0x410c3000; + cpu->has_smt = true; + define_neoverse_n1_cp_reginfo(cpu); } @@ -740,6 +746,8 @@ static void aarch64_neoverse_v1_initfn(Object *obj) /* From 5.5.1 AArch64 PMU register summary */ cpu->isar.reset_pmcr_el0 = 0x41213000; + cpu->has_smt = true; + define_neoverse_v1_cp_reginfo(cpu); aarch64_add_pauth_properties(obj); @@ -958,6 +966,8 @@ static void aarch64_a710_initfn(Object *obj) /* FIXME: Not documented -- copied from neoverse-v1 */ cpu->reset_sctlr = 0x30c50838; + cpu->has_smt = true; + define_arm_cp_regs(cpu, cortex_a710_cp_reginfo); aarch64_add_pauth_properties(obj); @@ -1055,6 +1065,8 @@ static void aarch64_neoverse_n2_initfn(Object *obj) /* FIXME: Not documented -- copied from neoverse-v1 */ cpu->reset_sctlr = 0x30c50838; + cpu->has_smt = true; + /* * The Neoverse N2 has all of the Cortex-A710 IMPDEF registers, * and a few more RNG related ones. -- 2.39.2