Hi Babu, On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: > Date: Fri, 4 Aug 2023 10:48:29 -0500 > From: "Moger, Babu" <babu.mo...@amd.com> > Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > CPUID[4] > > Hi Zhao, > > On 8/4/23 04:48, Zhao Liu wrote: > > Hi Babu, > > > > On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: > >> Date: Thu, 3 Aug 2023 11:41:40 -0500 > >> From: "Moger, Babu" <babu.mo...@amd.com> > >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >> CPUID[4] > >> > >> Hi Zhao, > >> > >> On 8/2/23 18:49, Moger, Babu wrote: > >>> Hi Zhao, > >>> > >>> Hitting this error after this patch. > >>> > >>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > >>> not be reached > >>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > >>> should not be reached > >>> Aborted (core dumped) > >>> > >>> Looks like share_level for all the caches for AMD is not initialized. > > > > I missed these change when I rebase. Sorry for that. > > > > BTW, could I ask a question? From a previous discussion[1], I understand > > that the cache info is used to show the correct cache information in > > new machine. And from [2], the wrong cache info may cause "compatibility > > issues". > > > > Is this "compatibility issues" AMD specific? I'm not sure if Intel should > > update the cache info like that. thanks! > > I was going to comment about that. Good that you asked me. > > Compatibility is qemu requirement. Otherwise the migrations will fail. > > Any changes in the topology is going to cause migration problems.
Could you please educate me more about the details of the "migration problem"? I didn't understand why it was causing the problem and wasn't sure if I was missing any cases. Thanks, Zhao > > I am not sure how you are going to handle this. You can probably look at > the feature "x-intel-pt-auto-level". > > make sure to test the migration. > > Thanks > Babu > > > > > > [1]: > > https://patchwork.kernel.org/project/kvm/patch/cy4pr12mb1768a3cbe42aafb03cb1081e95...@cy4pr12mb1768.namprd12.prod.outlook.com/ > > [2]: > > https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.mo...@amd.com/ > > > >> > >> The following patch fixes the problem. > >> > >> ====================================================== > >> > >> > >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c > >> index f4c48e19fa..976a2755d8 100644 > >> --- a/target/i386/cpu.c > >> +++ b/target/i386/cpu.c > >> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { > >> .size = 2 * MiB, > >> .line_size = 64, > >> .associativity = 8, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > > > > This "legacy_l2_cache_cpuid2" is not used to encode cache topology. > > I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW. > > > >> }; > >> > >> > >> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .partitions = 1, > >> .sets = 2048, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> > >> ========================================================================= > > > > > > Look good to me except legacy_l2_cache_cpuid2, thanks very much! > > I'll add this in next version. > > > > -Zhao > > > >> > >> Thanks > >> Babu > >>> > >>> On 8/1/23 05:35, Zhao Liu wrote: > >>>> From: Zhao Liu <zhao1....@intel.com> > >>>> > >>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for > >>>> intel CPUs. > >>>> > >>>> After cache models have topology information, we can use > >>>> CPUCacheInfo.share_level to decide which topology level to be encoded > >>>> into CPUID[4].EAX[bits 25:14]. > >>>> > >>>> And since maximum_processor_id (original "num_apic_ids") is parsed > >>>> based on cpu topology levels, which are verified when parsing smp, it's > >>>> no need to check this value by "assert(num_apic_ids > 0)" again, so > >>>> remove this assert. > >>>> > >>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a > >>>> helper to make the code cleaner. > >>>> > >>>> Signed-off-by: Zhao Liu <zhao1....@intel.com> > >>>> --- > >>>> Changes since v1: > >>>> * Use "enum CPUTopoLevel share_level" as the parameter in > >>>> max_processor_ids_for_cache(). > >>>> * Make cache_into_passthrough case also use > >>>> max_processor_ids_for_cache() and max_core_ids_in_package() to > >>>> encode CPUID[4]. (Yanan) > >>>> * Rename the title of this patch (the original is "i386: Use > >>>> CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). > >>>> --- > >>>> target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ > >>>> 1 file changed, 43 insertions(+), 27 deletions(-) > >>>> > >>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c > >>>> index 55aba4889628..c9897c0fe91a 100644 > >>>> --- a/target/i386/cpu.c > >>>> +++ b/target/i386/cpu.c > >>>> @@ -234,22 +234,53 @@ static uint8_t > >>>> cpuid2_cache_descriptor(CPUCacheInfo *cache) > >>>> ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ > >>>> 0 /* Invalid value */) > >>>> > >>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, > >>>> + enum CPUTopoLevel > >>>> share_level) > >>>> +{ > >>>> + uint32_t num_ids = 0; > >>>> + > >>>> + switch (share_level) { > >>>> + case CPU_TOPO_LEVEL_CORE: > >>>> + num_ids = 1 << apicid_core_offset(topo_info); > >>>> + break; > >>>> + case CPU_TOPO_LEVEL_DIE: > >>>> + num_ids = 1 << apicid_die_offset(topo_info); > >>>> + break; > >>>> + case CPU_TOPO_LEVEL_PACKAGE: > >>>> + num_ids = 1 << apicid_pkg_offset(topo_info); > >>>> + break; > >>>> + default: > >>>> + /* > >>>> + * Currently there is no use case for SMT and MODULE, so use > >>>> + * assert directly to facilitate debugging. > >>>> + */ > >>>> + g_assert_not_reached(); > >>>> + } > >>>> + > >>>> + return num_ids - 1; > >>>> +} > >>>> + > >>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > >>>> +{ > >>>> + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > >>>> + apicid_core_offset(topo_info)); > >>>> + return num_cores - 1; > >>>> +} > >>>> > >>>> /* Encode cache info for CPUID[4] */ > >>>> static void encode_cache_cpuid4(CPUCacheInfo *cache, > >>>> - int num_apic_ids, int num_cores, > >>>> + X86CPUTopoInfo *topo_info, > >>>> uint32_t *eax, uint32_t *ebx, > >>>> uint32_t *ecx, uint32_t *edx) > >>>> { > >>>> assert(cache->size == cache->line_size * cache->associativity * > >>>> cache->partitions * cache->sets); > >>>> > >>>> - assert(num_apic_ids > 0); > >>>> *eax = CACHE_TYPE(cache->type) | > >>>> CACHE_LEVEL(cache->level) | > >>>> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | > >>>> - ((num_cores - 1) << 26) | > >>>> - ((num_apic_ids - 1) << 14); > >>>> + (max_core_ids_in_package(topo_info) << 26) | > >>>> + (max_processor_ids_for_cache(topo_info, cache->share_level) > >>>> << 14); > >>>> > >>>> assert(cache->line_size > 0); > >>>> assert(cache->partitions > 0); > >>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t > >>>> index, uint32_t count, > >>>> int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> > >>>> 14); > >>>> > >>>> if (cores_per_pkg > 1) { > >>>> - int addressable_cores_offset = > >>>> - > >>>> apicid_pkg_offset(&topo_info) - > >>>> - > >>>> apicid_core_offset(&topo_info); > >>>> - > >>>> *eax &= ~0xFC000000; > >>>> - *eax |= (1 << addressable_cores_offset - 1) << 26; > >>>> + *eax |= max_core_ids_in_package(&topo_info) << 26; > >>>> } > >>>> if (host_vcpus_per_cache > cpus_per_pkg) { > >>>> - int pkg_offset = apicid_pkg_offset(&topo_info); > >>>> - > >>>> *eax &= ~0x3FFC000; > >>>> - *eax |= (1 << pkg_offset - 1) << 14; > >>>> + *eax |= > >>>> + max_processor_ids_for_cache(&topo_info, > >>>> + CPU_TOPO_LEVEL_PACKAGE) > >>>> << 14; > >>>> } > >>>> } > >>>> } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { > >>>> *eax = *ebx = *ecx = *edx = 0; > >>>> } else { > >>>> *eax = 0; > >>>> - int addressable_cores_offset = > >>>> apicid_pkg_offset(&topo_info) - > >>>> - > >>>> apicid_core_offset(&topo_info); > >>>> - int core_offset, die_offset; > >>>> > >>>> switch (count) { > >>>> case 0: /* L1 dcache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 1: /* L1 icache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 2: /* L2 cache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 3: /* L3 cache info */ > >>>> - die_offset = apicid_die_offset(&topo_info); > >>>> if (cpu->enable_l3_cache) { > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, > >>>> - (1 << die_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> } > >>> > >> > >> -- > >> Thanks > >> Babu Moger > > -- > Thanks > Babu Moger