Re: [PATCH v2] ppc/pnv: Improve pervasive topology calculation for big-core

2024-02-27 Thread Cédric Le Goater

On 2/27/24 21:36, Caleb Schlossin wrote:

Big (SMT8) cores have a complicated function to map the core, thread ID
to pervasive topology (PIR). Fix this for power8, power9, and power10.

Signed-off-by: Caleb Schlossin 
---

Version 2 fixes the PIR calculation for core, thread ID
for power10 big cores (SMT8).


Looks good for SMT4 and this change prepares ground SMT8. We would need
a new CPU definition to activate big cores. It can come later.

Reviewed-by: Cédric Le Goater 

Thanks,

C.





  include/hw/ppc/pnv_chip.h |  2 +-
  include/hw/ppc/pnv_core.h |  1 +
  hw/ppc/pnv.c  | 71 ---
  hw/ppc/pnv_core.c |  8 ++---
  target/ppc/misc_helper.c  |  3 --
  5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index af4cd7a8b8..8589f3291e 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -147,7 +147,7 @@ struct PnvChipClass {
  
  DeviceRealize parent_realize;
  
-uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);

+uint32_t (*chip_pir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id);
  void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp);
  void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu);
  void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu);
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 4db21229a6..c6d62fd145 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -36,6 +36,7 @@ struct PnvCore {
  /*< public >*/
  PowerPCCPU **threads;
  uint32_t pir;
+uint32_t hwid;
  uint64_t hrmor;
  PnvChip *chip;
  
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c

index 0b47b92baa..aa5aba60b4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -141,8 +141,10 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  int smt_threads = CPU_CORE(pc)->nr_threads;
  CPUPPCState *env = >env;
  PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip);
  g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads);
  int i;
+uint32_t pir;
  uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
 0x, 0x};
  uint32_t tbfreq = PNV_TIMEBASE_FREQ;
@@ -158,15 +160,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  char *nodename;
  int cpus_offset = get_cpus_node(fdt);
  
-nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);

+pir = pnv_cc->chip_pir(chip, pc->hwid, 0);
+
+nodename = g_strdup_printf("%s@%x", dc->fw_name, pir);
  offset = fdt_add_subnode(fdt, cpus_offset, nodename);
  _FDT(offset);
  g_free(nodename);
  
  _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
  
-_FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));

-_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "reg", pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pir)));
  _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
  
  _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));

@@ -241,15 +245,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  
  /* Build interrupt servers properties */

  for (i = 0; i < smt_threads; i++) {
-servers_prop[i] = cpu_to_be32(pc->pir + i);
+servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i));
  }
  _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
 servers_prop, sizeof(*servers_prop) * smt_threads)));
  }
  
-static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,

+static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t hwid,
 uint32_t nr_threads)
  {
+PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+uint32_t pir = pcc->chip_pir(chip, hwid, 0);
  uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
  char *name;
  const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
@@ -263,6 +269,7 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t 
pir,
  rsize = sizeof(uint64_t) * 2 * nr_threads;
  reg = g_malloc(rsize);
  for (i = 0; i < nr_threads; i++) {
+/* We know P8 PIR is linear with thread id */
  reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
  reg[i * 2 + 1] = cpu_to_be64(0x1000);
  }
@@ -315,7 +322,7 @@ static void pnv_chip_power8_dt_populate(PnvChip *chip, void 
*fdt)
  pnv_dt_core(chip, pnv_core, fdt);
  
  /* Interrupt Control Presenters (ICP). One per core. */

-pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
+pnv_dt_icp(chip, fdt, pnv_core->hwid, CPU_CORE(pnv_core)->nr_threads);
  }
  
  if (chip->ram_size) {

@@ -995,9 +1002,10 @@ static void pnv_init(MachineState *machine)
   *   25:28  Core 

[PATCH v2] ppc/pnv: Improve pervasive topology calculation for big-core

2024-02-27 Thread Caleb Schlossin
Big (SMT8) cores have a complicated function to map the core, thread ID
to pervasive topology (PIR). Fix this for power8, power9, and power10.

Signed-off-by: Caleb Schlossin 
---

Version 2 fixes the PIR calculation for core, thread ID
for power10 big cores (SMT8).

 include/hw/ppc/pnv_chip.h |  2 +-
 include/hw/ppc/pnv_core.h |  1 +
 hw/ppc/pnv.c  | 71 ---
 hw/ppc/pnv_core.c |  8 ++---
 target/ppc/misc_helper.c  |  3 --
 5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index af4cd7a8b8..8589f3291e 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -147,7 +147,7 @@ struct PnvChipClass {
 
 DeviceRealize parent_realize;
 
-uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);
+uint32_t (*chip_pir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id);
 void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp);
 void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu);
 void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu);
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 4db21229a6..c6d62fd145 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -36,6 +36,7 @@ struct PnvCore {
 /*< public >*/
 PowerPCCPU **threads;
 uint32_t pir;
+uint32_t hwid;
 uint64_t hrmor;
 PnvChip *chip;
 
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 0b47b92baa..aa5aba60b4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -141,8 +141,10 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
 int smt_threads = CPU_CORE(pc)->nr_threads;
 CPUPPCState *env = >env;
 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip);
 g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads);
 int i;
+uint32_t pir;
 uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
0x, 0x};
 uint32_t tbfreq = PNV_TIMEBASE_FREQ;
@@ -158,15 +160,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
 char *nodename;
 int cpus_offset = get_cpus_node(fdt);
 
-nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
+pir = pnv_cc->chip_pir(chip, pc->hwid, 0);
+
+nodename = g_strdup_printf("%s@%x", dc->fw_name, pir);
 offset = fdt_add_subnode(fdt, cpus_offset, nodename);
 _FDT(offset);
 g_free(nodename);
 
 _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
 
-_FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
-_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "reg", pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pir)));
 _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
 
 _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
@@ -241,15 +245,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
 
 /* Build interrupt servers properties */
 for (i = 0; i < smt_threads; i++) {
-servers_prop[i] = cpu_to_be32(pc->pir + i);
+servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i));
 }
 _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
servers_prop, sizeof(*servers_prop) * smt_threads)));
 }
 
-static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,
+static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t hwid,
uint32_t nr_threads)
 {
+PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+uint32_t pir = pcc->chip_pir(chip, hwid, 0);
 uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
 char *name;
 const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
@@ -263,6 +269,7 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t 
pir,
 rsize = sizeof(uint64_t) * 2 * nr_threads;
 reg = g_malloc(rsize);
 for (i = 0; i < nr_threads; i++) {
+/* We know P8 PIR is linear with thread id */
 reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
 reg[i * 2 + 1] = cpu_to_be64(0x1000);
 }
@@ -315,7 +322,7 @@ static void pnv_chip_power8_dt_populate(PnvChip *chip, void 
*fdt)
 pnv_dt_core(chip, pnv_core, fdt);
 
 /* Interrupt Control Presenters (ICP). One per core. */
-pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
+pnv_dt_icp(chip, fdt, pnv_core->hwid, CPU_CORE(pnv_core)->nr_threads);
 }
 
 if (chip->ram_size) {
@@ -995,9 +1002,10 @@ static void pnv_init(MachineState *machine)
  *   25:28  Core number
  *   29:31  Thread ID
  */
-static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
+static uint32_t pnv_chip_pir_p8(PnvChip *chip, uint32_t core_id,
+uint32_t thread_id)
 {
-return (chip->chip_id << 7) | (core_id << 3);
+return (chip->chip_id