Re: [PATCH v5 5/5] hw/riscv: virt: Allow creating multiple NUMA sockets
On Fri, May 29, 2020 at 4:50 AM Anup Patel wrote: > > We extend RISC-V virt machine to allow creating a multi-socket > machine. Each RISC-V virt machine socket is a NUMA node having > a set of HARTs, a memory instance, a CLINT instance, and a PLIC > instance. Other devices are shared between all sockets. We also > update the generated device tree accordingly. > > By default, NUMA multi-socket support is disabled for RISC-V virt > machine. To enable it, users can use "-numa" command-line options > of QEMU. > > Example1: For two NUMA nodes with 2 CPUs each, append following > to command-line options: "-smp 4 -numa node -numa node" > > Example2: For two NUMA nodes with 1 and 3 CPUs, append following > to command-line options: > "-smp 4 -numa node -numa node -numa cpu,node-id=0,core-id=0 \ > -numa cpu,node-id=1,core-id=1 -numa cpu,node-id=1,core-id=2 \ > -numa cpu,node-id=1,core-id=3" > > The maximum number of sockets in a RISC-V virt machine is 8 > but this limit can be changed in future. > > Signed-off-by: Anup Patel > --- > hw/riscv/virt.c | 530 +++- > include/hw/riscv/virt.h | 9 +- > 2 files changed, 308 insertions(+), 231 deletions(-) > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index 421815081d..2863b42cea 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -35,6 +35,7 @@ > #include "hw/riscv/sifive_test.h" > #include "hw/riscv/virt.h" > #include "hw/riscv/boot.h" > +#include "hw/riscv/numa.h" > #include "chardev/char.h" > #include "sysemu/arch_init.h" > #include "sysemu/device_tree.h" > @@ -60,7 +61,7 @@ static const struct MemmapEntry { > [VIRT_TEST] ={ 0x10,0x1000 }, > [VIRT_RTC] = { 0x101000,0x1000 }, > [VIRT_CLINT] = { 0x200, 0x1 }, > -[VIRT_PLIC] ={ 0xc00, 0x400 }, > +[VIRT_PLIC] ={ 0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, > [VIRT_UART0] = { 0x1000, 0x100 }, > [VIRT_VIRTIO] = { 0x10001000,0x1000 }, > [VIRT_FLASH] = { 0x2000, 0x400 }, > @@ -182,10 +183,17 @@ static void create_fdt(RISCVVirtState *s, const struct > MemmapEntry *memmap, > uint64_t mem_size, const char *cmdline) > { > void *fdt; > -int cpu, i; > -uint32_t *cells; > -char *nodename; > -uint32_t plic_phandle, test_phandle, phandle = 1; > +int i, cpu, socket; > +MachineState *mc = MACHINE(s); > +uint64_t addr, size; > +uint32_t *clint_cells, *plic_cells; > +unsigned long clint_addr, plic_addr; > +uint32_t plic_phandle[MAX_NODES]; > +uint32_t cpu_phandle, intc_phandle, test_phandle; > +uint32_t phandle = 1, plic_mmio_phandle = 1; > +uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; > +char *mem_name, *cpu_name, *core_name, *intc_name; > +char *name, *clint_name, *plic_name, *clust_name; > hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; > hwaddr flashbase = virt_memmap[VIRT_FLASH].base; > > @@ -206,231 +214,238 @@ static void create_fdt(RISCVVirtState *s, const > struct MemmapEntry *memmap, > qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); > qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); > > -nodename = g_strdup_printf("/memory@%lx", > -(long)memmap[VIRT_DRAM].base); > -qemu_fdt_add_subnode(fdt, nodename); > -qemu_fdt_setprop_cells(fdt, nodename, "reg", > -memmap[VIRT_DRAM].base >> 32, memmap[VIRT_DRAM].base, > -mem_size >> 32, mem_size); > -qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); > -g_free(nodename); > - > qemu_fdt_add_subnode(fdt, "/cpus"); > qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", >SIFIVE_CLINT_TIMEBASE_FREQ); > qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); > qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); > +qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); > + > +for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { > +clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); > +qemu_fdt_add_subnode(fdt, clust_name); > + > +plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); > +clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); > + > +for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { > +cpu_phandle = phandle++; > > -for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { > -int cpu_phandle = phandle++; > -int intc_phandle; > -nodename = g_strdup_printf("/cpus/cpu@%d", cpu); > -char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", > cpu); > -char *isa = riscv_isa_string(&s->soc.harts[cpu]); > -qemu_fdt_add_subnode(fdt, nodename); > +cpu_name = g_strdup_printf("/cpus/cpu@%d", > +s->soc[socket].hartid_base + cpu); >
RE: [PATCH v5 5/5] hw/riscv: virt: Allow creating multiple NUMA sockets
> -Original Message- > From: Alistair Francis > Sent: 11 June 2020 04:55 > To: Anup Patel > Cc: Peter Maydell ; Palmer Dabbelt > ; Alistair Francis ; Sagar > Karandikar ; Atish Patra > ; open list:RISC-V ; > qemu-devel@nongnu.org Developers ; Anup > Patel > Subject: Re: [PATCH v5 5/5] hw/riscv: virt: Allow creating multiple NUMA > sockets > > On Fri, May 29, 2020 at 4:49 AM Anup Patel wrote: > > > > We extend RISC-V virt machine to allow creating a multi-socket > > machine. Each RISC-V virt machine socket is a NUMA node having a set > > of HARTs, a memory instance, a CLINT instance, and a PLIC instance. > > Other devices are shared between all sockets. We also update the > > generated device tree accordingly. > > > > By default, NUMA multi-socket support is disabled for RISC-V virt > > machine. To enable it, users can use "-numa" command-line options of > > QEMU. > > > > Example1: For two NUMA nodes with 2 CPUs each, append following to > > command-line options: "-smp 4 -numa node -numa node" > > > > Example2: For two NUMA nodes with 1 and 3 CPUs, append following to > > command-line options: > > "-smp 4 -numa node -numa node -numa cpu,node-id=0,core-id=0 \ -numa > > cpu,node-id=1,core-id=1 -numa cpu,node-id=1,core-id=2 \ -numa > > cpu,node-id=1,core-id=3" > > > > The maximum number of sockets in a RISC-V virt machine is 8 but this > > limit can be changed in future. > > > > Signed-off-by: Anup Patel > > --- > > hw/riscv/virt.c | 530 +++- > > include/hw/riscv/virt.h | 9 +- > > 2 files changed, 308 insertions(+), 231 deletions(-) > > > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index > > 421815081d..2863b42cea 100644 > > --- a/hw/riscv/virt.c > > +++ b/hw/riscv/virt.c > > @@ -35,6 +35,7 @@ > > #include "hw/riscv/sifive_test.h" > > #include "hw/riscv/virt.h" > > #include "hw/riscv/boot.h" > > +#include "hw/riscv/numa.h" > > #include "chardev/char.h" > > #include "sysemu/arch_init.h" > > #include "sysemu/device_tree.h" > > @@ -60,7 +61,7 @@ static const struct MemmapEntry { > > [VIRT_TEST] ={ 0x10,0x1000 }, > > [VIRT_RTC] = { 0x101000,0x1000 }, > > [VIRT_CLINT] = { 0x200, 0x1 }, > > -[VIRT_PLIC] ={ 0xc00, 0x400 }, > > +[VIRT_PLIC] ={ 0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, > > [VIRT_UART0] = { 0x1000, 0x100 }, > > [VIRT_VIRTIO] = { 0x10001000,0x1000 }, > > [VIRT_FLASH] = { 0x2000, 0x400 }, > > @@ -182,10 +183,17 @@ static void create_fdt(RISCVVirtState *s, const > struct MemmapEntry *memmap, > > uint64_t mem_size, const char *cmdline) { > > void *fdt; > > -int cpu, i; > > -uint32_t *cells; > > -char *nodename; > > -uint32_t plic_phandle, test_phandle, phandle = 1; > > +int i, cpu, socket; > > +MachineState *mc = MACHINE(s); > > +uint64_t addr, size; > > +uint32_t *clint_cells, *plic_cells; > > +unsigned long clint_addr, plic_addr; > > +uint32_t plic_phandle[MAX_NODES]; > > +uint32_t cpu_phandle, intc_phandle, test_phandle; > > +uint32_t phandle = 1, plic_mmio_phandle = 1; > > +uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; > > +char *mem_name, *cpu_name, *core_name, *intc_name; > > +char *name, *clint_name, *plic_name, *clust_name; > > hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; > > hwaddr flashbase = virt_memmap[VIRT_FLASH].base; > > > > @@ -206,231 +214,238 @@ static void create_fdt(RISCVVirtState *s, const > struct MemmapEntry *memmap, > > qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); > > qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); > > > > -nodename = g_strdup_printf("/memory@%lx", > > -(long)memmap[VIRT_DRAM].base); > > -qemu_fdt_add_subnode(fdt, nodename); > > -qemu_fdt_setprop_cells(fdt, nodename, "reg", > > -memmap[VIRT_DRAM].base >> 32, memmap[VIRT_DRAM].base, > > -mem_size >> 32, mem_size); > > -qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); > > -g_free(nodename); > > - > > qemu_fdt_add_subnode(fdt, &q
Re: [PATCH v5 5/5] hw/riscv: virt: Allow creating multiple NUMA sockets
On Fri, May 29, 2020 at 4:49 AM Anup Patel wrote: > > We extend RISC-V virt machine to allow creating a multi-socket > machine. Each RISC-V virt machine socket is a NUMA node having > a set of HARTs, a memory instance, a CLINT instance, and a PLIC > instance. Other devices are shared between all sockets. We also > update the generated device tree accordingly. > > By default, NUMA multi-socket support is disabled for RISC-V virt > machine. To enable it, users can use "-numa" command-line options > of QEMU. > > Example1: For two NUMA nodes with 2 CPUs each, append following > to command-line options: "-smp 4 -numa node -numa node" > > Example2: For two NUMA nodes with 1 and 3 CPUs, append following > to command-line options: > "-smp 4 -numa node -numa node -numa cpu,node-id=0,core-id=0 \ > -numa cpu,node-id=1,core-id=1 -numa cpu,node-id=1,core-id=2 \ > -numa cpu,node-id=1,core-id=3" > > The maximum number of sockets in a RISC-V virt machine is 8 > but this limit can be changed in future. > > Signed-off-by: Anup Patel > --- > hw/riscv/virt.c | 530 +++- > include/hw/riscv/virt.h | 9 +- > 2 files changed, 308 insertions(+), 231 deletions(-) > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index 421815081d..2863b42cea 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -35,6 +35,7 @@ > #include "hw/riscv/sifive_test.h" > #include "hw/riscv/virt.h" > #include "hw/riscv/boot.h" > +#include "hw/riscv/numa.h" > #include "chardev/char.h" > #include "sysemu/arch_init.h" > #include "sysemu/device_tree.h" > @@ -60,7 +61,7 @@ static const struct MemmapEntry { > [VIRT_TEST] ={ 0x10,0x1000 }, > [VIRT_RTC] = { 0x101000,0x1000 }, > [VIRT_CLINT] = { 0x200, 0x1 }, > -[VIRT_PLIC] ={ 0xc00, 0x400 }, > +[VIRT_PLIC] ={ 0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, > [VIRT_UART0] = { 0x1000, 0x100 }, > [VIRT_VIRTIO] = { 0x10001000,0x1000 }, > [VIRT_FLASH] = { 0x2000, 0x400 }, > @@ -182,10 +183,17 @@ static void create_fdt(RISCVVirtState *s, const struct > MemmapEntry *memmap, > uint64_t mem_size, const char *cmdline) > { > void *fdt; > -int cpu, i; > -uint32_t *cells; > -char *nodename; > -uint32_t plic_phandle, test_phandle, phandle = 1; > +int i, cpu, socket; > +MachineState *mc = MACHINE(s); > +uint64_t addr, size; > +uint32_t *clint_cells, *plic_cells; > +unsigned long clint_addr, plic_addr; > +uint32_t plic_phandle[MAX_NODES]; > +uint32_t cpu_phandle, intc_phandle, test_phandle; > +uint32_t phandle = 1, plic_mmio_phandle = 1; > +uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; > +char *mem_name, *cpu_name, *core_name, *intc_name; > +char *name, *clint_name, *plic_name, *clust_name; > hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; > hwaddr flashbase = virt_memmap[VIRT_FLASH].base; > > @@ -206,231 +214,238 @@ static void create_fdt(RISCVVirtState *s, const > struct MemmapEntry *memmap, > qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); > qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); > > -nodename = g_strdup_printf("/memory@%lx", > -(long)memmap[VIRT_DRAM].base); > -qemu_fdt_add_subnode(fdt, nodename); > -qemu_fdt_setprop_cells(fdt, nodename, "reg", > -memmap[VIRT_DRAM].base >> 32, memmap[VIRT_DRAM].base, > -mem_size >> 32, mem_size); > -qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); > -g_free(nodename); > - > qemu_fdt_add_subnode(fdt, "/cpus"); > qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", >SIFIVE_CLINT_TIMEBASE_FREQ); > qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); > qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); > +qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); I'm no expert with cpu-map. Do you mind CCing Atish in the next version and see if he can Ack these DT changes? > + > +for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { > +clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); > +qemu_fdt_add_subnode(fdt, clust_name); > + > +plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); > +clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); > + > +for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { > +cpu_phandle = phandle++; > > -for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { > -int cpu_phandle = phandle++; > -int intc_phandle; > -nodename = g_strdup_printf("/cpus/cpu@%d", cpu); > -char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", > cpu); > -char *isa = riscv_isa_string(&s->soc.harts[cpu]); > -qemu_fdt_add_subnode(fdt, nodename);
[PATCH v5 5/5] hw/riscv: virt: Allow creating multiple NUMA sockets
We extend RISC-V virt machine to allow creating a multi-socket machine. Each RISC-V virt machine socket is a NUMA node having a set of HARTs, a memory instance, a CLINT instance, and a PLIC instance. Other devices are shared between all sockets. We also update the generated device tree accordingly. By default, NUMA multi-socket support is disabled for RISC-V virt machine. To enable it, users can use "-numa" command-line options of QEMU. Example1: For two NUMA nodes with 2 CPUs each, append following to command-line options: "-smp 4 -numa node -numa node" Example2: For two NUMA nodes with 1 and 3 CPUs, append following to command-line options: "-smp 4 -numa node -numa node -numa cpu,node-id=0,core-id=0 \ -numa cpu,node-id=1,core-id=1 -numa cpu,node-id=1,core-id=2 \ -numa cpu,node-id=1,core-id=3" The maximum number of sockets in a RISC-V virt machine is 8 but this limit can be changed in future. Signed-off-by: Anup Patel --- hw/riscv/virt.c | 530 +++- include/hw/riscv/virt.h | 9 +- 2 files changed, 308 insertions(+), 231 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 421815081d..2863b42cea 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -35,6 +35,7 @@ #include "hw/riscv/sifive_test.h" #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" +#include "hw/riscv/numa.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" @@ -60,7 +61,7 @@ static const struct MemmapEntry { [VIRT_TEST] ={ 0x10,0x1000 }, [VIRT_RTC] = { 0x101000,0x1000 }, [VIRT_CLINT] = { 0x200, 0x1 }, -[VIRT_PLIC] ={ 0xc00, 0x400 }, +[VIRT_PLIC] ={ 0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, [VIRT_UART0] = { 0x1000, 0x100 }, [VIRT_VIRTIO] = { 0x10001000,0x1000 }, [VIRT_FLASH] = { 0x2000, 0x400 }, @@ -182,10 +183,17 @@ static void create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, uint64_t mem_size, const char *cmdline) { void *fdt; -int cpu, i; -uint32_t *cells; -char *nodename; -uint32_t plic_phandle, test_phandle, phandle = 1; +int i, cpu, socket; +MachineState *mc = MACHINE(s); +uint64_t addr, size; +uint32_t *clint_cells, *plic_cells; +unsigned long clint_addr, plic_addr; +uint32_t plic_phandle[MAX_NODES]; +uint32_t cpu_phandle, intc_phandle, test_phandle; +uint32_t phandle = 1, plic_mmio_phandle = 1; +uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; +char *mem_name, *cpu_name, *core_name, *intc_name; +char *name, *clint_name, *plic_name, *clust_name; hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; hwaddr flashbase = virt_memmap[VIRT_FLASH].base; @@ -206,231 +214,238 @@ static void create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); -nodename = g_strdup_printf("/memory@%lx", -(long)memmap[VIRT_DRAM].base); -qemu_fdt_add_subnode(fdt, nodename); -qemu_fdt_setprop_cells(fdt, nodename, "reg", -memmap[VIRT_DRAM].base >> 32, memmap[VIRT_DRAM].base, -mem_size >> 32, mem_size); -qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); -g_free(nodename); - qemu_fdt_add_subnode(fdt, "/cpus"); qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", SIFIVE_CLINT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); +qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); + +for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { +clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); +qemu_fdt_add_subnode(fdt, clust_name); + +plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); +clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); + +for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { +cpu_phandle = phandle++; -for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { -int cpu_phandle = phandle++; -int intc_phandle; -nodename = g_strdup_printf("/cpus/cpu@%d", cpu); -char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); -char *isa = riscv_isa_string(&s->soc.harts[cpu]); -qemu_fdt_add_subnode(fdt, nodename); +cpu_name = g_strdup_printf("/cpus/cpu@%d", +s->soc[socket].hartid_base + cpu); +qemu_fdt_add_subnode(fdt, cpu_name); #if defined(TARGET_RISCV32) -qemu_fdt_setprop_string(fdt, nodename, "mmu-type", "riscv,sv32"); +qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv32"); #else -qemu_fdt_setprop_stri