> -----Original Message----- > From: Xu, Tao3 > Sent: Thursday, September 12, 2019 1:37 PM > To: imamm...@redhat.com; ebl...@redhat.com; ehabk...@redhat.com > Cc: Xu, Tao3 <tao3...@intel.com>; Liu, Jingqi <jingqi....@intel.com>; > Williams, Dan J <dan.j.willi...@intel.com>; > jonathan.came...@huawei.com; Du, Fan <fan...@intel.com>; qemu-devel@nongnu.org > Subject: [PATCH v11 05/11] numa: Extend CLI to provide initiator information > for numa nodes > > In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT), The > initiator represents processor which access to memory. And > in 5.2.27.3 Memory Proximity Domain Attributes Structure, the attached > initiator is defined as where the memory controller responsible for > a memory proximity domain. With attached initiator information, the topology > of heterogeneous memory can be described. > > Extend CLI of "-numa node" option to indicate the initiator numa node-id. > In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report > the platform's HMAT tables. > Thanks. Reviewed-by: Jingqi Liu <jingqi....@intel.com>
Jingqi > Suggested-by: Dan Williams <dan.j.willi...@intel.com> > Signed-off-by: Tao Xu <tao3...@intel.com> > --- > > No changes in v11. > > Changes in v10: > - Add machine oprion properties "-machine hmat=on|off" for > enabling or disabling HMAT in QEMU. > - Add more description for initiator option. > - Report error then HMAT is enalbe and initiator option is > missing. Not allow invaild initiator now. (Igor) > --- > hw/core/machine.c | 72 +++++++++++++++++++++++++++++++++++++++++++ > hw/core/numa.c | 11 +++++++ > include/sysemu/numa.h | 6 ++++ > qapi/machine.json | 10 +++++- > qemu-options.hx | 35 ++++++++++++++++++--- > 5 files changed, 128 insertions(+), 6 deletions(-) > > diff --git a/hw/core/machine.c b/hw/core/machine.c index > 1689ad3bf8..b42f574282 100644 > --- a/hw/core/machine.c > +++ b/hw/core/machine.c > @@ -518,6 +518,20 @@ static void machine_set_nvdimm(Object *obj, bool value, > Error **errp) > ms->nvdimms_state->is_enabled = value; } > > +static bool machine_get_hmat(Object *obj, Error **errp) { > + MachineState *ms = MACHINE(obj); > + > + return ms->numa_state->hmat_enabled; } > + > +static void machine_set_hmat(Object *obj, bool value, Error **errp) { > + MachineState *ms = MACHINE(obj); > + > + ms->numa_state->hmat_enabled = value; } > + > static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) { > MachineState *ms = MACHINE(obj); > @@ -645,6 +659,7 @@ void machine_set_cpu_numa_node(MachineState *machine, > const CpuInstanceProperties *props, Error > **errp) { > MachineClass *mc = MACHINE_GET_CLASS(machine); > + NodeInfo *numa_info = machine->numa_state->nodes; > bool match = false; > int i; > > @@ -714,6 +729,16 @@ void machine_set_cpu_numa_node(MachineState *machine, > match = true; > slot->props.node_id = props->node_id; > slot->props.has_node_id = props->has_node_id; > + > + if (numa_info[props->node_id].initiator_valid && > + (props->node_id != numa_info[props->node_id].initiator)) { > + error_setg(errp, "The initiator of CPU NUMA node %" PRId64 > + " should be itself.", props->node_id); > + return; > + } > + numa_info[props->node_id].initiator_valid = true; > + numa_info[props->node_id].has_cpu = true; > + numa_info[props->node_id].initiator = props->node_id; > } > > if (!match) { > @@ -960,6 +985,13 @@ static void machine_initfn(Object *obj) > > if (mc->numa_mem_supported) { > ms->numa_state = g_new0(NumaState, 1); > + object_property_add_bool(obj, "hmat", > + machine_get_hmat, machine_set_hmat, > + &error_abort); > + object_property_set_description(obj, "hmat", > + "Set on/off to enable/disable " > + "ACPI Heterogeneous Memory Attribute > " > + "Table (HMAT)", NULL); > } > > /* Register notifier when init is done for sysbus sanity checks */ @@ > -1048,6 +1080,41 @@ static char *cpu_slot_to_string(const > CPUArchId *cpu) > return g_string_free(s, false); > } > > +static void numa_validate_initiator(NumaState *nstat) { > + int i; > + NodeInfo *numa_info = nstat->nodes; > + > + for (i = 0; i < nstat->num_nodes; i++) { > + if (numa_info[i].initiator >= MAX_NODES) { > + error_report("The initiator id %" PRIu16 " expects an integer " > + "between 0 and %d", numa_info[i].initiator, > + MAX_NODES - 1); > + goto err; > + } > + > + if (!numa_info[numa_info[i].initiator].present) { > + error_report("NUMA node %" PRIu16 " is missing, use " > + "'-numa node' option to declare it first.", > + numa_info[i].initiator); > + goto err; > + } > + > + if (numa_info[numa_info[i].initiator].has_cpu) { > + numa_info[i].initiator_valid = true; > + } else { > + error_report("The initiator of NUMA node %d is invalid.", i); > + goto err; > + } > + } > + > + return; > + > +err: > + error_printf("\n"); > + exit(1); > +} > + > static void machine_numa_finish_cpu_init(MachineState *machine) { > int i; > @@ -1088,6 +1155,11 @@ static void machine_numa_finish_cpu_init(MachineState > *machine) > machine_set_cpu_numa_node(machine, &props, &error_fatal); > } > } > + > + if (machine->numa_state->hmat_enabled) { > + numa_validate_initiator(machine->numa_state); > + } > + > if (s->len && !qtest_enabled()) { > warn_report("CPU(s) not present in any NUMA nodes: %s", > s->str); > diff --git a/hw/core/numa.c b/hw/core/numa.c index 4dfec5c95b..bdce7d4217 > 100644 > --- a/hw/core/numa.c > +++ b/hw/core/numa.c > @@ -133,6 +133,17 @@ static void parse_numa_node(MachineState *ms, > NumaNodeOptions *node, > numa_info[nodenr].node_mem = object_property_get_uint(o, "size", > NULL); > numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); > } > + > + if (node->has_initiator) { > + if (!ms->numa_state->hmat_enabled) { > + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " > + "(HMAT) is disabled, use -machine hmat=on before " > + "set initiator of NUMA"); > + return; > + } > + > + numa_info[nodenr].initiator = node->initiator; > + } > numa_info[nodenr].present = true; > max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); > ms->numa_state->num_nodes++; > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index > ae9c41d02b..a788c3b126 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -18,6 +18,9 @@ struct NodeInfo { > uint64_t node_mem; > struct HostMemoryBackend *node_memdev; > bool present; > + bool has_cpu; > + bool initiator_valid; > + uint16_t initiator; > uint8_t distance[MAX_NODES]; > }; > > @@ -33,6 +36,9 @@ struct NumaState { > /* Allow setting NUMA distance for different NUMA nodes */ > bool have_numa_distance; > > + /* Detect if HMAT support is enabled. */ > + bool hmat_enabled; > + > /* NUMA nodes information */ > NodeInfo nodes[MAX_NODES]; > }; > diff --git a/qapi/machine.json b/qapi/machine.json index > ca26779f1a..3c2914cd1c 100644 > --- a/qapi/machine.json > +++ b/qapi/machine.json > @@ -463,6 +463,13 @@ > # @memdev: memory backend object. If specified for one node, > # it must be specified for all nodes. > # > +# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145, > +# indicate the nodeid which has the memory controller > +# responsible for this NUMA node. This field provides > +# additional information as to the initiator node that > +# is closest (as in directly attached) to this node, and > +# therefore has the best performance (since 4.2) > +# > # Since: 2.1 > ## > { 'struct': 'NumaNodeOptions', > @@ -470,7 +477,8 @@ > '*nodeid': 'uint16', > '*cpus': ['uint16'], > '*mem': 'size', > - '*memdev': 'str' }} > + '*memdev': 'str', > + '*initiator': 'uint16' }} > > ## > # @NumaDistOptions: > diff --git a/qemu-options.hx b/qemu-options.hx index bbfd936d29..74ccc4d782 > 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -43,7 +43,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ > " suppress-vmdesc=on|off disables self-describing > migration (default=off)\n" > " nvdimm=on|off controls NVDIMM support (default=off)\n" > " enforce-config-section=on|off enforce configuration > section migration (default=off)\n" > - " memory-encryption=@var{} memory encryption object to > use (default=none)\n", > + " memory-encryption=@var{} memory encryption object to > use (default=none)\n" > + " hmat=on|off controls ACPI HMAT support (default=off)\n", > QEMU_ARCH_ALL) > STEXI > @item -machine [type=]@var{name}[,prop=@var{value}[,...]] > @@ -103,6 +104,9 @@ NOTE: this parameter is deprecated. Please use > @option{-global} @option{migration.send- > configuration}=@var{on|off} instead. > @item memory-encryption=@var{} > Memory encryption object to use. The default is none. > +@item hmat=on|off > +Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support. > +The default is off. > @end table > ETEXI > > @@ -161,14 +165,14 @@ If any on the three values is given, the total number > of CPUs @var{n} can be omi ETEXI > > DEF("numa", HAS_ARG, QEMU_OPTION_numa, > - "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" > - "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" > + "-numa > node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" > + "-numa > node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" > "-numa dist,src=source,dst=destination,val=distance\n" > "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n", > QEMU_ARCH_ALL) > STEXI > -@item -numa > node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] > -@itemx -numa > node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] > +@item -numa > +node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@va > +r{node}][,initiator=@var{initiator}] > +@itemx -numa > +node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@v > +ar{node}][,initiator=@var{initiator}] > @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} > @itemx -numa > cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] > @findex -numa > @@ -215,6 +219,27 @@ split equally between them. > @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one > node uses @samp{memdev}, all of them have to use it. > > +@samp{initiator} is an additional option indicate the @var{initiator} > +NUMA that has best performance (the lowest latency or largest > +bandwidth) to this NUMA @var{node}. Note that this option can be set > +only when the machine oprion properties "-machine hmat=on". > + > +Following example creates a machine with 2 NUMA nodes, node 0 has CPU. > +node 1 has only memory, and its' initiator is node 0. Note that because > +node 0 has CPU, by default the initiator of node 0 is itself and must > +be itself. > +@example > +-machine hmat=on \ > +-m 2G,slots=2,maxmem=4G \ > +-object memory-backend-ram,size=1G,id=m0 \ -object > +memory-backend-ram,size=1G,id=m1 \ -numa node,nodeid=0,memdev=m0 \ > +-numa node,nodeid=1,memdev=m1,initiator=0 \ -smp 2,sockets=2,maxcpus=2 > +\ -numa cpu,node-id=0,socket-id=0 \ -numa cpu,node-id=0,socket-id=1 > +@end example > + > @var{source} and @var{destination} are NUMA node IDs. > @var{distance} is the NUMA distance from @var{source} to @var{destination}. > The distance from a node to itself is always 10. If any pair of nodes is > -- > 2.20.1