On Fri, 14 Jun 2019 23:56:24 +0800 Tao Xu <tao3...@intel.com> wrote: > From: Liu Jingqi <jingqi....@intel.com> > > HMAT is defined in ACPI 6.2: 5.2.27 Heterogeneous Memory Attribute Table > (HMAT). > The specification references below link: > http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf > > It describes the memory attributes, such as memory side cache > attributes and bandwidth and latency details, related to the > System Physical Address (SPA) Memory Ranges. The software is > expected to use this information as hint for optimization. > > This structure describes the System Physical Address(SPA) range > occupied by memory subsystem and its associativity with processor > proximity domain as well as hint for memory usage. > > Signed-off-by: Liu Jingqi <jingqi....@intel.com> > Signed-off-by: Tao Xu <tao3...@intel.com> > --- > > Changes in v5 -> v4: > - Add more descriptions from ACPI spec (Igor) > - Remove all the dependcy on PCMachineState (Igor) > --- > hw/acpi/Kconfig | 5 ++ > hw/acpi/Makefile.objs | 1 + > hw/acpi/hmat.c | 153 ++++++++++++++++++++++++++++++++++++++++++ > hw/acpi/hmat.h | 43 ++++++++++++ > hw/core/machine.c | 2 + > hw/i386/acpi-build.c | 3 + > include/sysemu/numa.h | 2 + > numa.c | 6 ++ > 8 files changed, 215 insertions(+) > create mode 100644 hw/acpi/hmat.c > create mode 100644 hw/acpi/hmat.h > > diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig > index 7c59cf900b..039bb99efa 100644 > --- a/hw/acpi/Kconfig > +++ b/hw/acpi/Kconfig > @@ -7,6 +7,7 @@ config ACPI_X86 > select ACPI_NVDIMM > select ACPI_CPU_HOTPLUG > select ACPI_MEMORY_HOTPLUG > + select ACPI_HMAT > > config ACPI_X86_ICH > bool > @@ -31,3 +32,7 @@ config ACPI_VMGENID > bool > default y > depends on PC > + > +config ACPI_HMAT > + bool > + depends on ACPI > diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs > index 661a9b8c2f..20cc2fb124 100644 > --- a/hw/acpi/Makefile.objs > +++ b/hw/acpi/Makefile.objs > @@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o > common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o > common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o > common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o > +common-obj-$(CONFIG_ACPI_HMAT) += hmat.o > common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o > > common-obj-y += acpi_interface.o > diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c > new file mode 100644 > index 0000000000..6fd434c4d9 > --- /dev/null > +++ b/hw/acpi/hmat.c > @@ -0,0 +1,153 @@ > +/* > + * HMAT ACPI Implementation > + * > + * Copyright(C) 2019 Intel Corporation. > + * > + * Author: > + * Liu jingqi <jingqi....@linux.intel.com> > + * Tao Xu <tao3...@intel.com> > + * > + * HMAT is defined in ACPI 6.2: 5.2.27 Heterogeneous Memory Attribute Table > + * (HMAT) > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see > <http://www.gnu.org/licenses/> > + */ > + > +#include "qemu/osdep.h" > +#include "sysemu/numa.h" > +#include "hw/acpi/hmat.h" > +#include "hw/mem/pc-dimm.h" > + > +/* ACPI 6.2: 5.2.27.3 Memory Subsystem Address Range Structure: Table 5-141 > */ > +static void build_hmat_spa(GArray *table_data, uint16_t flags, > + uint64_t base, uint64_t length, int node) > +{ > + > + /* Memory Subsystem Address Range Structure */ > + /* Type */ > + build_append_int_noprefix(table_data, 0, 2); > + /* Reserved */ > + build_append_int_noprefix(table_data, 0, 2); > + /* Length */ > + build_append_int_noprefix(table_data, 40, 4); > + /* Flags */ > + build_append_int_noprefix(table_data, flags, 2); > + /* Reserved */ > + build_append_int_noprefix(table_data, 0, 2); > + /* Process Proximity Domain */ > + build_append_int_noprefix(table_data, node, 4); > + /* Memory Proximity Domain */ > + build_append_int_noprefix(table_data, node, 4); > + /* Reserved */ > + build_append_int_noprefix(table_data, 0, 4); > + /* System Physical Address Range Base */ > + build_append_int_noprefix(table_data, base, 8); > + /* System Physical Address Range Length */ > + build_append_int_noprefix(table_data, length, 8); > +} > + > +static int pc_dimm_device_list(Object *obj, void *opaque) > +{ > + GSList **list = opaque; > + > + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { > + DeviceState *dev = DEVICE(obj); > + if (dev->realized) { /* only realized memory devices matter */ > + *list = g_slist_append(*list, DEVICE(obj)); > + } > + } > + > + object_child_foreach(obj, pc_dimm_device_list, opaque); > + return 0; > +} > + > +/* Build HMAT sub table structures */ > +static void hmat_build_table_structs(GArray *table_data, MachineState *ms) > +{ > + GSList *device_list = NULL; > + uint16_t flags; > + uint64_t mem_base, mem_len; > + int i; > + NumaState *nstat = ms->numa_state; > + NumaMemRange *mem_range; > + > + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL); > + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); > + AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj); > + > + /* > + * ACPI 6.2: 5.2.27.3 Memory Subsystem Address Range Structure: > + * Table 5-141. The Proximity Domain of System Physical Address > + * ranges defined in the HMAT, NFIT and SRAT tables shall match > + * each other. > + */ > + if (nstat->num_nodes && !nstat->mem_ranges_num) { > + nstat->mem_ranges = g_array_new(false, true /* clear */, > + sizeof *mem_range); > + adevc->build_mem_ranges(adev, ms); another place you are tying to initialize nstat->mem_ranges make initialization in generic numa init code
> + } > + > + for (i = 0; i < nstat->mem_ranges_num; i++) { > + mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i); > + flags = 0; > + > + if (nstat->nodes[mem_range->node].is_initiator) { > + flags |= HMAT_SPA_PROC_VALID; > + } > + if (nstat->nodes[mem_range->node].is_target) { > + flags |= HMAT_SPA_MEM_VALID; > + } > + > + build_hmat_spa(table_data, flags, mem_range->base, > + mem_range->length, > + mem_range->node); > + } > + > + /* Build HMAT SPA structures for PC-DIMM devices. */ > + object_child_foreach(OBJECT(ms), pc_dimm_device_list, &device_list); > + > + for (; device_list; device_list = device_list->next) { > + PCDIMMDevice *dimm = device_list->data; > + mem_base = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP, > + NULL); > + mem_len = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, > + NULL); > + i = object_property_get_uint(OBJECT(dimm), PC_DIMM_NODE_PROP, NULL); > + flags = 0; > + > + if (nstat->nodes[i].is_initiator) { > + flags |= HMAT_SPA_PROC_VALID; > + } > + if (nstat->nodes[i].is_target) { > + flags |= HMAT_SPA_MEM_VALID; > + } > + build_hmat_spa(table_data, flags, mem_base, mem_len, i); > + } Don't you need to free device_list at this point? > +} > + > +void build_hmat(GArray *table_data, BIOSLinker *linker, MachineState *ms) > +{ > + uint64_t hmat_start; > + > + hmat_start = table_data->len; > + > + /* reserve space for HMAT header */ > + acpi_data_push(table_data, 40); > + > + hmat_build_table_structs(table_data, ms); > + > + build_header(linker, table_data, > + (void *)(table_data->data + hmat_start), > + "HMAT", table_data->len - hmat_start, 1, NULL, NULL); > +} > diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h > new file mode 100644 > index 0000000000..e24b673fad > --- /dev/null > +++ b/hw/acpi/hmat.h > @@ -0,0 +1,43 @@ > +/* > + * HMAT ACPI Implementation Header > + * > + * Copyright(C) 2019 Intel Corporation. > + * > + * Author: > + * Liu jingqi <jingqi....@linux.intel.com> > + * Tao Xu <tao3...@intel.com> > + * > + * HMAT is defined in ACPI 6.2. > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see > <http://www.gnu.org/licenses/> > + */ > + > +#ifndef HMAT_H > +#define HMAT_H > + > +#include "hw/acpi/acpi-defs.h" > +#include "hw/acpi/acpi.h" > +#include "hw/acpi/bios-linker-loader.h" > +#include "hw/acpi/aml-build.h" > + > +/* the values of AcpiHmatSpaRange flag */ > +enum { > + HMAT_SPA_PROC_VALID = 0x1, > + HMAT_SPA_MEM_VALID = 0x2, > + HMAT_SPA_RESERVATION_HINT = 0x4, > +}; > + > +void build_hmat(GArray *table_data, BIOSLinker *linker, MachineState *ms); > + > +#endif > diff --git a/hw/core/machine.c b/hw/core/machine.c > index 14b29de0a9..2ad09ec23e 100644 > --- a/hw/core/machine.c > +++ b/hw/core/machine.c > @@ -646,6 +646,7 @@ void machine_set_cpu_numa_node(MachineState *machine, > const CpuInstanceProperties *props, Error > **errp) > { > MachineClass *mc = MACHINE_GET_CLASS(machine); > + NodeInfo *numa_info = machine->numa_state->nodes; > bool match = false; > int i; > > @@ -706,6 +707,7 @@ void machine_set_cpu_numa_node(MachineState *machine, > match = true; > slot->props.node_id = props->node_id; > slot->props.has_node_id = props->has_node_id; > + numa_info[props->node_id].is_initiator = true; > } > > if (!match) { > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 44dd447fa5..6584eac76e 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -66,6 +66,7 @@ > #include "hw/i386/intel_iommu.h" > > #include "hw/acpi/ipmi.h" > +#include "hw/acpi/hmat.h" > > /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and > * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows > @@ -2710,6 +2711,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState > *machine) > acpi_add_table(table_offsets, tables_blob); > build_slit(tables_blob, tables->linker, machine); > } > + acpi_add_table(table_offsets, tables_blob); > + build_hmat(tables_blob, tables->linker, machine); I'm not sure if we should add it unconditionally. Is this table used in any meaningful manner by guest when it's incomplete (i.e. populated only with SPA records)? > } > if (acpi_get_mcfg(&mcfg)) { > acpi_add_table(table_offsets, tables_blob); > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index e3c85b77bc..13cff59112 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -10,6 +10,8 @@ struct NodeInfo { > uint64_t node_mem; > struct HostMemoryBackend *node_memdev; > bool present; > + bool is_initiator; > + bool is_target; > uint8_t distance[MAX_NODES]; > }; > > diff --git a/numa.c b/numa.c > index d23e130bce..5556d118c3 100644 > --- a/numa.c > +++ b/numa.c > @@ -102,6 +102,10 @@ static void parse_numa_node(MachineState *ms, > NumaNodeOptions *node, > } > } > > + if (node->cpus) { > + numa_info[nodenr].is_initiator = true; > + } > + > if (node->has_mem && node->has_memdev) { > error_setg(errp, "cannot specify both mem= and memdev="); > return; > @@ -118,6 +122,7 @@ static void parse_numa_node(MachineState *ms, > NumaNodeOptions *node, > > if (node->has_mem) { > numa_info[nodenr].node_mem = node->mem; > + numa_info[nodenr].is_target = true; > } > if (node->has_memdev) { > Object *o; > @@ -130,6 +135,7 @@ static void parse_numa_node(MachineState *ms, > NumaNodeOptions *node, > object_ref(o); > numa_info[nodenr].node_mem = object_property_get_uint(o, "size", > NULL); > numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); > + numa_info[nodenr].is_target = true; > } > numa_info[nodenr].present = true; > max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);