The empty NUMA nodes, where no memory resides, aren't exposed through ACPI SRAT table. It's not user preferred behaviour because the corresponding memory node devices are missed from the guest kernel as the following example shows. It means the guest kernel doesn't have the node information as user specifies. However, memory can be still hot added to these empty NUMA nodes when they're not exposed.
/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host \ -cpu host -smp 4,sockets=2,cores=2,threads=1 \ -m 1024M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=512M \ -object memory-backend-ram,id=mem1,size=512M \ -numa node,nodeid=0,cpus=0-1,memdev=mem0 \ -numa node,nodeid=1,cpus=2-3,memdev=mem1 \ -numa node,nodeid=2 \ -numa node,nodeid=3 \ : guest# ls /sys/devices/system/node | grep node node0 node1 (qemu) object_add memory-backend-ram,id=hp-mem0,size=1G (qemu) device_add pc-dimm,id=hp-dimm0,node=3,memdev=hp-mem0 guest# ls /sys/devices/system/node | grep node node0 node1 node2 guest# cat /sys/devices/system/node/node2/meminfo | grep MemTotal Node 2 MemTotal: 1048576 kB This exposes these empty NUMA nodes through ACPI SRAT table. With this applied, the corresponding memory node devices can be found from the guest. Note that the hotpluggable capability is explicitly given to these empty NUMA nodes for sake of completeness. guest# ls /sys/devices/system/node | grep node node0 node1 node2 node3 guest# cat /sys/devices/system/node/node3/meminfo | grep MemTotal Node 3 MemTotal: 0 kB (qemu) object_add memory-backend-ram,id=hp-mem0,size=1G (qemu) device_add pc-dimm,id=hp-dimm0,node=3,memdev=hp-mem0 guest# cat /sys/devices/system/node/node3/meminfo | grep MemTotal Node 3 MemTotal: 1048576 kB Signed-off-by: Gavin Shan <gs...@redhat.com> Reviewed-by: Andrew Jones <drjo...@redhat.com> --- v2: Improved commit log as suggested by Drew and Igor. --- hw/arm/virt-acpi-build.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 674f902652..a4c95b2f64 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -526,6 +526,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(ms); AcpiTable table = { .sig = "SRAT", .rev = 3, .oem_id = vms->oem_id, .oem_table_id = vms->oem_table_id }; + MemoryAffinityFlags flags; acpi_table_begin(&table, table_data); build_append_int_noprefix(table_data, 1, 4); /* Reserved */ @@ -547,12 +548,15 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) mem_base = vms->memmap[VIRT_MEM].base; for (i = 0; i < ms->numa_state->num_nodes; ++i) { - if (ms->numa_state->nodes[i].node_mem > 0) { - build_srat_memory(table_data, mem_base, - ms->numa_state->nodes[i].node_mem, i, - MEM_AFFINITY_ENABLED); - mem_base += ms->numa_state->nodes[i].node_mem; + if (ms->numa_state->nodes[i].node_mem) { + flags = MEM_AFFINITY_ENABLED; + } else { + flags = MEM_AFFINITY_ENABLED | MEM_AFFINITY_HOTPLUGGABLE; } + + build_srat_memory(table_data, mem_base, + ms->numa_state->nodes[i].node_mem, i, flags); + mem_base += ms->numa_state->nodes[i].node_mem; } if (ms->nvdimms_state->is_enabled) { -- 2.23.0