On Wed, 27 May 2020 23:48:06 -0600 Vishal Verma <vishal.l.ve...@intel.com> wrote:
> NVDIMMs can belong to their own proximity domains, as described by the > NFIT. In such cases, the SRAT needs to have Memory Affinity structures > in the SRAT for these NVDIMMs, otherwise Linux doesn't populate node > data structures properly during NUMA initialization. See the following > for an example failure case. > > https://lore.kernel.org/linux-nvdimm/20200416225438.15208-1-vishal.l.ve...@intel.com/ > > Fix this by adding device address range and node information from > NVDIMMs to the SRAT in build_srat(). > > The relevant command line options to exercise this are below. Nodes 0-1 > contain CPUs and regular memory, and nodes 2-3 are the NVDIMM address > space. > > -numa node,nodeid=0,mem=2048M, > -numa node,nodeid=1,mem=2048M, > -numa node,nodeid=2,mem=0, > -object > memory-backend-file,id=nvmem0,share,mem-path=nvdimm-0,size=16384M,align=128M > -device nvdimm,memdev=nvmem0,id=nv0,label-size=2M,node=2 > -numa node,nodeid=3,mem=0, > -object > memory-backend-file,id=nvmem1,share,mem-path=nvdimm-1,size=16384M,align=128M > -device nvdimm,memdev=nvmem1,id=nv1,label-size=2M,node=3 > > Cc: Jingqi Liu <jingqi....@intel.com> > Cc: Michael S. Tsirkin <m...@redhat.com> > Reviewed-by: Jingqi Liu <jingqi....@intel.com> > Signed-off-by: Vishal Verma <vishal.l.ve...@intel.com> > --- > hw/acpi/nvdimm.c | 26 ++++++++++++++++++++++++++ > hw/i386/acpi-build.c | 10 ++++++++++ > include/hw/mem/nvdimm.h | 1 + > 3 files changed, 37 insertions(+) > > diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c > index 9316d12b70..d322c6a7a7 100644 > --- a/hw/acpi/nvdimm.c > +++ b/hw/acpi/nvdimm.c > @@ -28,6 +28,7 @@ > > #include "qemu/osdep.h" > #include "qemu/uuid.h" > +#include "qapi/error.h" > #include "hw/acpi/acpi.h" > #include "hw/acpi/aml-build.h" > #include "hw/acpi/bios-linker-loader.h" > @@ -1334,6 +1335,31 @@ static void nvdimm_build_ssdt(GArray *table_offsets, > GArray *table_data, > free_aml_allocator(); > } > > +void *nvdimm_build_srat(GArray *table_data) > +{ > + AcpiSratMemoryAffinity *numamem = NULL; > + GSList *device_list = nvdimm_get_device_list(); > + > + for (; device_list; device_list = device_list->next) { > + DeviceState *dev = device_list->data; I'd use Object here with OBJECT() cast and drop casts beolw in property getters > + uint64_t addr, size; > + int node; > + > + node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, > + &error_abort); > + addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP, > + &error_abort); > + size = object_property_get_uint(OBJECT(dev), PC_DIMM_SIZE_PROP, > + &error_abort); > + > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, addr, size, node, > + MEM_AFFINITY_ENABLED | MEM_AFFINITY_NON_VOLATILE); > + } > + g_slist_free(device_list); > + return numamem; > +} > + > void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, > BIOSLinker *linker, NVDIMMState *state, > uint32_t ram_slots) > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 2e15f6848e..1461d8a718 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2428,6 +2428,16 @@ build_srat(GArray *table_data, BIOSLinker *linker, > MachineState *machine) > MEM_AFFINITY_ENABLED); > } > } > + > + if (machine->nvdimms_state->is_enabled) { > + void *ret; > + > + ret = nvdimm_build_srat(table_data); > + if (ret != NULL) { > + numamem = ret; > + } why do we need return value here and a test condition and assign 'ret' to numamem? > + } > + > slots = (table_data->len - numa_start) / sizeof *numamem; > for (; slots < pcms->numa_nodes + 2; slots++) { > numamem = acpi_data_push(table_data, sizeof *numamem); > diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h > index a3c08955e8..fbe56509b8 100644 > --- a/include/hw/mem/nvdimm.h > +++ b/include/hw/mem/nvdimm.h > @@ -155,6 +155,7 @@ typedef struct NVDIMMState NVDIMMState; > void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, > struct AcpiGenericAddress dsm_io, > FWCfgState *fw_cfg, Object *owner); > +void *nvdimm_build_srat(GArray *table_data); > void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, > BIOSLinker *linker, NVDIMMState *state, > uint32_t ram_slots);