Re: [PATCH RESEND 05/18] i386/cpu: Consolidate the use of topo_info in cpu_x86_cpuid()

2023-02-14 Thread wangyanan (Y)

在 2023/2/15 15:10, Zhao Liu 写道:

On Wed, Feb 15, 2023 at 11:28:25AM +0800, wangyanan (Y) wrote:

Date: Wed, 15 Feb 2023 11:28:25 +0800
From: "wangyanan (Y)" 
Subject: Re: [PATCH RESEND 05/18] i386/cpu: Consolidate the use of
  topo_info in cpu_x86_cpuid()

在 2023/2/13 17:36, Zhao Liu 写道:

From: Zhao Liu 

In cpu_x86_cpuid(), there are many variables in representing the cpu
topology, e.g., topo_info, cs->nr_cores/cs->nr_threads.

Since the names of cs->nr_cores/cs->nr_threads does not accurately
represent its meaning, the use of cs->nr_cores/cs->nr_threads is prone
to confusion and mistakes.

And the structure X86CPUTopoInfo names its memebers clearly, thus the
variable "topo_info" should be preferred.

Suggested-by: Robert Hoo 
Signed-off-by: Zhao Liu 
---
   target/i386/cpu.c | 30 ++
   1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7833505092d8..4cda84eb96f1 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5215,11 +5215,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   uint32_t limit;
   uint32_t signature[3];
   X86CPUTopoInfo topo_info;
+uint32_t cpus_per_pkg;
   topo_info.dies_per_pkg = env->nr_dies;
   topo_info.cores_per_die = cs->nr_cores / env->nr_dies;
   topo_info.threads_per_core = cs->nr_threads;
+cpus_per_pkg = topo_info.dies_per_pkg * topo_info.cores_per_die *
+   topo_info.threads_per_core;
+
   /* Calculate & apply limits for different index ranges */
   if (index >= 0xC000) {
   limit = env->cpuid_xlevel2;
@@ -5255,8 +5259,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   *ecx |= CPUID_EXT_OSXSAVE;
   }
   *edx = env->features[FEAT_1_EDX];
-if (cs->nr_cores * cs->nr_threads > 1) {
-*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
+if (cpus_per_pkg > 1) {
+*ebx |= cpus_per_pkg << 16;
   *edx |= CPUID_HT;
   }
   if (!cpu->enable_pmu) {
@@ -5293,10 +5297,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
*/
   if (*eax & 31) {
   int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
-int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
-if (cs->nr_cores > 1) {
+int vcpus_per_socket = cpus_per_pkg;

Would it make sense to directly use cpus_per_pkg here

+int cores_per_socket = topo_info.cores_per_die *
+   topo_info.dies_per_pkg;

There are other places in cpu_x86_cpuid where cs->nr_cores is used
separately, why not make a global "cores_per_pkg" like cpus_per_pkg
and also tweak the other places?

Yeah, good idea.


+if (cores_per_socket > 1) {
   *eax &= ~0xFC00;
-*eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
+*eax |= (pow2ceil(cores_per_socket) - 1) << 26;
   }
   if (host_vcpus_per_cache > vcpus_per_socket) {
   *eax &= ~0x3FFC000;
@@ -5436,12 +5442,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   switch (count) {
   case 0:
   *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;

There are many other places in cpu_x86_cpuid where cs->nr_threads
is used separately, such as encode_cache_cpuid4(***), should we
replace them all?

In a previous patch [1], I replaced the use of cs->nr_threads/nr_cores in
the call of encode_cache_cpuid4().

The cleanest way is to pass topo_info to encode_cache_cpuid4(), but this
involves the modification of the interface format and the use of the
cache topo level, so I included it in a follow-up patch [2].

Ok, I see. I have not reached there.


[1]: [PATCH RESEND 04/18] i386/cpu: Fix number of addressable IDs in
  CPUID.04,
  https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03188.html
[2]: [PATCH RESEND 15/18] i386: Use CPUCacheInfo.share_level to encode
  CPUID[4].EAX[bits 25:14],
  https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03199.html


   *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
   break;
   case 1:
   *eax = apicid_pkg_offset(_info);
-*ebx = cs->nr_cores * cs->nr_threads;
+*ebx = cpus_per_pkg;
   *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
   break;
   default:
@@ -5472,7 +5478,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   switch (count) {
   case 0:
   *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;
   *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
   break;
  

Re: [PATCH RESEND 05/18] i386/cpu: Consolidate the use of topo_info in cpu_x86_cpuid()

2023-02-14 Thread Zhao Liu
On Wed, Feb 15, 2023 at 11:28:25AM +0800, wangyanan (Y) wrote:
> Date: Wed, 15 Feb 2023 11:28:25 +0800
> From: "wangyanan (Y)" 
> Subject: Re: [PATCH RESEND 05/18] i386/cpu: Consolidate the use of
>  topo_info in cpu_x86_cpuid()
> 
> 在 2023/2/13 17:36, Zhao Liu 写道:
> > From: Zhao Liu 
> > 
> > In cpu_x86_cpuid(), there are many variables in representing the cpu
> > topology, e.g., topo_info, cs->nr_cores/cs->nr_threads.
> > 
> > Since the names of cs->nr_cores/cs->nr_threads does not accurately
> > represent its meaning, the use of cs->nr_cores/cs->nr_threads is prone
> > to confusion and mistakes.
> > 
> > And the structure X86CPUTopoInfo names its memebers clearly, thus the
> > variable "topo_info" should be preferred.
> > 
> > Suggested-by: Robert Hoo 
> > Signed-off-by: Zhao Liu 
> > ---
> >   target/i386/cpu.c | 30 ++
> >   1 file changed, 18 insertions(+), 12 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 7833505092d8..4cda84eb96f1 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -5215,11 +5215,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
> > index, uint32_t count,
> >   uint32_t limit;
> >   uint32_t signature[3];
> >   X86CPUTopoInfo topo_info;
> > +uint32_t cpus_per_pkg;
> >   topo_info.dies_per_pkg = env->nr_dies;
> >   topo_info.cores_per_die = cs->nr_cores / env->nr_dies;
> >   topo_info.threads_per_core = cs->nr_threads;
> > +cpus_per_pkg = topo_info.dies_per_pkg * topo_info.cores_per_die *
> > +   topo_info.threads_per_core;
> > +
> >   /* Calculate & apply limits for different index ranges */
> >   if (index >= 0xC000) {
> >   limit = env->cpuid_xlevel2;
> > @@ -5255,8 +5259,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
> > uint32_t count,
> >   *ecx |= CPUID_EXT_OSXSAVE;
> >   }
> >   *edx = env->features[FEAT_1_EDX];
> > -if (cs->nr_cores * cs->nr_threads > 1) {
> > -*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
> > +if (cpus_per_pkg > 1) {
> > +*ebx |= cpus_per_pkg << 16;
> >   *edx |= CPUID_HT;
> >   }
> >   if (!cpu->enable_pmu) {
> > @@ -5293,10 +5297,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
> > index, uint32_t count,
> >*/
> >   if (*eax & 31) {
> >   int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
> > -int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
> > -if (cs->nr_cores > 1) {
> > +int vcpus_per_socket = cpus_per_pkg;
> Would it make sense to directly use cpus_per_pkg here
> > +int cores_per_socket = topo_info.cores_per_die *
> > +   topo_info.dies_per_pkg;
> There are other places in cpu_x86_cpuid where cs->nr_cores is used
> separately, why not make a global "cores_per_pkg" like cpus_per_pkg
> and also tweak the other places?

Yeah, good idea.

> > +if (cores_per_socket > 1) {
> >   *eax &= ~0xFC00;
> > -*eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
> > +*eax |= (pow2ceil(cores_per_socket) - 1) << 26;
> >   }
> >   if (host_vcpus_per_cache > vcpus_per_socket) {
> >   *eax &= ~0x3FFC000;
> > @@ -5436,12 +5442,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
> > index, uint32_t count,
> >   switch (count) {
> >   case 0:
> >   *eax = apicid_core_offset(_info);
> > -*ebx = cs->nr_threads;
> > +*ebx = topo_info.threads_per_core;
> There are many other places in cpu_x86_cpuid where cs->nr_threads
> is used separately, such as encode_cache_cpuid4(***), should we
> replace them all?

In a previous patch [1], I replaced the use of cs->nr_threads/nr_cores in
the call of encode_cache_cpuid4().

The cleanest way is to pass topo_info to encode_cache_cpuid4(), but this
involves the modification of the interface format and the use of the
cache topo level, so I included it in a follow-up patch [2].

[1]: [PATCH RESEND 04/18] i386/cpu: Fix number of addressable IDs in
 CPUID.04,
 https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03188.html
[2]: [PATCH RESEND 15/18] i386: Use CPUCacheInfo.share_level to encode
 CPUID[4].EAX[bits 25:14],
 https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03199.html

> >   *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
> >   break;
> >   case 1:
> >   *eax = apicid_pkg_offset(_info);
> > -*ebx = cs->nr_cores * cs->nr_threads;
> > +*ebx = cpus_per_pkg;
> >   *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
> >   break;
> >   default:
> > @@ -5472,7 +5478,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
> > uint32_t count,
> >   

Re: [PATCH RESEND 05/18] i386/cpu: Consolidate the use of topo_info in cpu_x86_cpuid()

2023-02-14 Thread wangyanan (Y)

在 2023/2/13 17:36, Zhao Liu 写道:

From: Zhao Liu 

In cpu_x86_cpuid(), there are many variables in representing the cpu
topology, e.g., topo_info, cs->nr_cores/cs->nr_threads.

Since the names of cs->nr_cores/cs->nr_threads does not accurately
represent its meaning, the use of cs->nr_cores/cs->nr_threads is prone
to confusion and mistakes.

And the structure X86CPUTopoInfo names its memebers clearly, thus the
variable "topo_info" should be preferred.

Suggested-by: Robert Hoo 
Signed-off-by: Zhao Liu 
---
  target/i386/cpu.c | 30 ++
  1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7833505092d8..4cda84eb96f1 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5215,11 +5215,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  uint32_t limit;
  uint32_t signature[3];
  X86CPUTopoInfo topo_info;
+uint32_t cpus_per_pkg;
  
  topo_info.dies_per_pkg = env->nr_dies;

  topo_info.cores_per_die = cs->nr_cores / env->nr_dies;
  topo_info.threads_per_core = cs->nr_threads;
  
+cpus_per_pkg = topo_info.dies_per_pkg * topo_info.cores_per_die *

+   topo_info.threads_per_core;
+
  /* Calculate & apply limits for different index ranges */
  if (index >= 0xC000) {
  limit = env->cpuid_xlevel2;
@@ -5255,8 +5259,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  *ecx |= CPUID_EXT_OSXSAVE;
  }
  *edx = env->features[FEAT_1_EDX];
-if (cs->nr_cores * cs->nr_threads > 1) {
-*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
+if (cpus_per_pkg > 1) {
+*ebx |= cpus_per_pkg << 16;
  *edx |= CPUID_HT;
  }
  if (!cpu->enable_pmu) {
@@ -5293,10 +5297,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   */
  if (*eax & 31) {
  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
-int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
-if (cs->nr_cores > 1) {
+int vcpus_per_socket = cpus_per_pkg;

Would it make sense to directly use cpus_per_pkg here

+int cores_per_socket = topo_info.cores_per_die *
+   topo_info.dies_per_pkg;

There are other places in cpu_x86_cpuid where cs->nr_cores is used
separately, why not make a global "cores_per_pkg" like cpus_per_pkg
and also tweak the other places?

+if (cores_per_socket > 1) {
  *eax &= ~0xFC00;
-*eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
+*eax |= (pow2ceil(cores_per_socket) - 1) << 26;
  }
  if (host_vcpus_per_cache > vcpus_per_socket) {
  *eax &= ~0x3FFC000;
@@ -5436,12 +5442,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  switch (count) {
  case 0:
  *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;

There are many other places in cpu_x86_cpuid where cs->nr_threads
is used separately, such as encode_cache_cpuid4(***), should we
replace them all?

  *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
  break;
  case 1:
  *eax = apicid_pkg_offset(_info);
-*ebx = cs->nr_cores * cs->nr_threads;
+*ebx = cpus_per_pkg;
  *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
  break;
  default:
@@ -5472,7 +5478,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  switch (count) {
  case 0:
  *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;
  *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
  break;
  case 1:
@@ -5482,7 +5488,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  break;
  case 2:
  *eax = apicid_pkg_offset(_info);
-*ebx = cs->nr_cores * cs->nr_threads;
+*ebx = cpus_per_pkg;
  *ecx |= CPUID_TOPOLOGY_LEVEL_DIE;
  break;
  default:
@@ -5707,7 +5713,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   * discards multiple thread information if it is set.
   * So don't set it here for Intel to make Linux guests happy.
   */
-if (cs->nr_cores * cs->nr_threads > 1) {
+if (cpus_per_pkg > 1) {
  if (env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1 ||
  env->cpuid_vendor2 != CPUID_VENDOR_INTEL_2 ||
  env->cpuid_vendor3 != CPUID_VENDOR_INTEL_3) {
@@ -5769,7 +5775,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
   *eax |= 

[PATCH RESEND 05/18] i386/cpu: Consolidate the use of topo_info in cpu_x86_cpuid()

2023-02-13 Thread Zhao Liu
From: Zhao Liu 

In cpu_x86_cpuid(), there are many variables in representing the cpu
topology, e.g., topo_info, cs->nr_cores/cs->nr_threads.

Since the names of cs->nr_cores/cs->nr_threads does not accurately
represent its meaning, the use of cs->nr_cores/cs->nr_threads is prone
to confusion and mistakes.

And the structure X86CPUTopoInfo names its memebers clearly, thus the
variable "topo_info" should be preferred.

Suggested-by: Robert Hoo 
Signed-off-by: Zhao Liu 
---
 target/i386/cpu.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7833505092d8..4cda84eb96f1 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5215,11 +5215,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 uint32_t limit;
 uint32_t signature[3];
 X86CPUTopoInfo topo_info;
+uint32_t cpus_per_pkg;
 
 topo_info.dies_per_pkg = env->nr_dies;
 topo_info.cores_per_die = cs->nr_cores / env->nr_dies;
 topo_info.threads_per_core = cs->nr_threads;
 
+cpus_per_pkg = topo_info.dies_per_pkg * topo_info.cores_per_die *
+   topo_info.threads_per_core;
+
 /* Calculate & apply limits for different index ranges */
 if (index >= 0xC000) {
 limit = env->cpuid_xlevel2;
@@ -5255,8 +5259,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx |= CPUID_EXT_OSXSAVE;
 }
 *edx = env->features[FEAT_1_EDX];
-if (cs->nr_cores * cs->nr_threads > 1) {
-*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
+if (cpus_per_pkg > 1) {
+*ebx |= cpus_per_pkg << 16;
 *edx |= CPUID_HT;
 }
 if (!cpu->enable_pmu) {
@@ -5293,10 +5297,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  */
 if (*eax & 31) {
 int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
-int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
-if (cs->nr_cores > 1) {
+int vcpus_per_socket = cpus_per_pkg;
+int cores_per_socket = topo_info.cores_per_die *
+   topo_info.dies_per_pkg;
+if (cores_per_socket > 1) {
 *eax &= ~0xFC00;
-*eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
+*eax |= (pow2ceil(cores_per_socket) - 1) << 26;
 }
 if (host_vcpus_per_cache > vcpus_per_socket) {
 *eax &= ~0x3FFC000;
@@ -5436,12 +5442,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 switch (count) {
 case 0:
 *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;
 *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
 break;
 case 1:
 *eax = apicid_pkg_offset(_info);
-*ebx = cs->nr_cores * cs->nr_threads;
+*ebx = cpus_per_pkg;
 *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
 break;
 default:
@@ -5472,7 +5478,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 switch (count) {
 case 0:
 *eax = apicid_core_offset(_info);
-*ebx = cs->nr_threads;
+*ebx = topo_info.threads_per_core;
 *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
 break;
 case 1:
@@ -5482,7 +5488,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 break;
 case 2:
 *eax = apicid_pkg_offset(_info);
-*ebx = cs->nr_cores * cs->nr_threads;
+*ebx = cpus_per_pkg;
 *ecx |= CPUID_TOPOLOGY_LEVEL_DIE;
 break;
 default:
@@ -5707,7 +5713,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  * discards multiple thread information if it is set.
  * So don't set it here for Intel to make Linux guests happy.
  */
-if (cs->nr_cores * cs->nr_threads > 1) {
+if (cpus_per_pkg > 1) {
 if (env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1 ||
 env->cpuid_vendor2 != CPUID_VENDOR_INTEL_2 ||
 env->cpuid_vendor3 != CPUID_VENDOR_INTEL_3) {
@@ -5769,7 +5775,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  *eax |= (cpu_x86_virtual_addr_width(env) << 8);
 }
 *ebx = env->features[FEAT_8000_0008_EBX];
-if (cs->nr_cores * cs->nr_threads > 1) {
+if (cpus_per_pkg > 1) {
 /*
  * Bits 15:12 is "The number of bits in the initial
  * Core::X86::Apic::ApicId[ApicId] value that indicate
@@ -5777,7 +5783,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  * Bits 7:0 is "The number of