On Fri, Aug 30, 2013 at 11:10:41AM +0800, Wanlong Gao wrote: > Change -numa option like following as Paolo suggested: > -numa node,nodeid=0,cpus=0-1 \ > -numa mem,nodeid=0,size=1G > > This new option will make later coming memory hotplug better. > And this new option is implemented using OptsVisitor. > > And just remain "-numa node,mem=xx" as legacy. > > Reviewed-by: Laszlo Ersek <ler...@redhat.com> > Signed-off-by: Wanlong Gao <gaowanl...@cn.fujitsu.com>
Would it be possible to first move the existing code as-is to numa.c, then introduce qemu_numa_opts, and then introduce "-numa mem"? It would make the patch much easier to review. > --- > Makefile.target | 2 +- > include/sysemu/sysemu.h | 3 + > numa.c | 144 > ++++++++++++++++++++++++++++++++++++++++++++++++ > qemu-options.hx | 6 +- > vl.c | 113 ++++++------------------------------- > 5 files changed, 168 insertions(+), 100 deletions(-) > create mode 100644 numa.c > > diff --git a/Makefile.target b/Makefile.target > index 9a49852..7e1fddf 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -113,7 +113,7 @@ endif #CONFIG_BSD_USER > ######################################################### > # System emulator target > ifdef CONFIG_SOFTMMU > -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o > +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o > obj-y += qtest.o > obj-y += hw/ > obj-$(CONFIG_FDT) += device_tree.o > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index b1aa059..489b4b6 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -129,8 +129,11 @@ extern QEMUClockType rtc_clock; > #define MAX_NODES 64 > #define MAX_CPUMASK_BITS 255 > extern int nb_numa_nodes; > +extern int nb_numa_mem_nodes; > extern uint64_t node_mem[MAX_NODES]; > extern unsigned long *node_cpumask[MAX_NODES]; > +extern QemuOptsList qemu_numa_opts; > +int numa_init_func(QemuOpts *opts, void *opaque); > > #define MAX_OPTION_ROMS 16 > typedef struct QEMUOptionRom { > diff --git a/numa.c b/numa.c > new file mode 100644 > index 0000000..e6924f4 > --- /dev/null > +++ b/numa.c > @@ -0,0 +1,144 @@ > +/* > + * QEMU System Emulator > + * > + * Copyright (c) 2013 Fujitsu Ltd. > + * Author: Wanlong Gao <gaowanl...@cn.fujitsu.com> > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "sysemu/sysemu.h" > +#include "qemu/bitmap.h" > +#include "qapi-visit.h" > +#include "qapi/opts-visitor.h" > +#include "qapi/dealloc-visitor.h" > + > +QemuOptsList qemu_numa_opts = { > + .name = "numa", > + .implied_opt_name = "type", > + .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), > + .desc = { { 0 } } /* validated with OptsVisitor */ > +}; > + > +static int numa_node_parse(NumaNodeOptions *opts) > +{ > + uint16_t nodenr; > + uint16List *cpus = NULL; > + > + if (opts->has_nodeid) { > + nodenr = opts->nodeid; > + if (nodenr >= MAX_NODES) { > + fprintf(stderr, "qemu: Max number of NUMA nodes reached: %" > + PRIu16 "\n", nodenr); > + return -1; > + } > + } else { > + nodenr = nb_numa_nodes; > + } If you make the (nodenr >= MAX_NODES) check unconditional, you simplify the code and you won't need the nb_numa_nodes check at numa_init_func(). > + > + for (cpus = opts->cpus; cpus; cpus = cpus->next) { > + bitmap_set(node_cpumask[nodenr], cpus->value, 1); > + } What if cpus->value > MAXCPUMASK_BITS? > + > + if (opts->has_mem) { > + int64_t mem_size; > + char *endptr; > + mem_size = strtosz(opts->mem, &endptr); > + if (mem_size < 0 || *endptr) { > + fprintf(stderr, "qemu: invalid numa mem size: %s\n", opts->mem); > + return -1; > + } > + node_mem[nodenr] = mem_size; > + } > + > + return 0; > +} > + > +static int numa_mem_parse(NumaMemOptions *opts) > +{ > + uint16_t nodenr; > + uint64_t mem_size; > + > + if (opts->has_nodeid) { > + nodenr = opts->nodeid; > + if (nodenr >= MAX_NODES) { > + fprintf(stderr, "qemu: Max number of NUMA nodes reached: %" > + PRIu16 "\n", nodenr); > + return -1; > + } > + } else { > + nodenr = nb_numa_mem_nodes; What if nb_numa_mem_nodes > MAX_NODES? > + } > + > + if (opts->has_size) { > + mem_size = opts->size; > + node_mem[nodenr] = mem_size; > + } > + > + return 0; > +} > + > +int numa_init_func(QemuOpts *opts, void *opaque) > +{ > + NumaOptions *object = NULL; > + Error *err = NULL; > + int ret = 0; > + > + { > + OptsVisitor *ov = opts_visitor_new(opts); > + visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err); > + opts_visitor_cleanup(ov); > + } > + > + if (error_is_set(&err)) { > + fprintf(stderr, "qemu: %s\n", error_get_pretty(err)); > + error_free(err); > + ret = -1; > + goto error; > + } > + > + switch (object->kind) { > + case NUMA_OPTIONS_KIND_NODE: > + if (nb_numa_nodes >= MAX_NODES) { > + fprintf(stderr, "qemu: too many NUMA nodes\n"); > + ret = -1; > + goto error; > + } > + ret = numa_node_parse(object->node); > + nb_numa_nodes++; > + break; > + case NUMA_OPTIONS_KIND_MEM: > + ret = numa_mem_parse(object->mem); > + nb_numa_mem_nodes++; > + break; > + default: > + fprintf(stderr, "qemu: Invalid NUMA options type.\n"); > + ret = -1; > + } > + > +error: > + if (object) { > + QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); > + visit_type_NumaOptions(qapi_dealloc_get_visitor(dv), > + &object, NULL, NULL); > + qapi_dealloc_visitor_cleanup(dv); > + } > + > + return ret; > +} > diff --git a/qemu-options.hx b/qemu-options.hx > index d15338e..e9123b8 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -95,11 +95,13 @@ specifies the maximum number of hotpluggable CPUs. > ETEXI > > DEF("numa", HAS_ARG, QEMU_OPTION_numa, > - "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) > + "-numa node[,nodeid=node][,cpus=cpu[-cpu]]\n" > + "-numa mem[,nodeid=node][,size=size]\n" > + , QEMU_ARCH_ALL) > STEXI > @item -numa @var{opts} > @findex -numa > -Simulate a multi node NUMA system. If mem and cpus are omitted, resources > +Simulate a multi node NUMA system. If @var{size} and @var{cpus} are omitted, > resources > are split equally. > ETEXI > > diff --git a/vl.c b/vl.c > index dfbc071..0ef5c5a 100644 > --- a/vl.c > +++ b/vl.c > @@ -250,6 +250,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = > QTAILQ_HEAD_INITIALIZER(fw_boot_order); > > int nb_numa_nodes; > +int nb_numa_mem_nodes; > uint64_t node_mem[MAX_NODES]; > unsigned long *node_cpumask[MAX_NODES]; > > @@ -1330,102 +1331,6 @@ char *get_boot_devices_list(size_t *size) > return list; > } > > -static void numa_node_parse_cpus(int nodenr, const char *cpus) > -{ > - char *endptr; > - unsigned long long value, endvalue; > - > - /* Empty CPU range strings will be considered valid, they will simply > - * not set any bit in the CPU bitmap. > - */ > - if (!*cpus) { > - return; > - } > - > - if (parse_uint(cpus, &value, &endptr, 10) < 0) { > - goto error; > - } > - if (*endptr == '-') { > - if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { > - goto error; > - } > - } else if (*endptr == '\0') { > - endvalue = value; > - } else { > - goto error; > - } > - > - if (endvalue >= MAX_CPUMASK_BITS) { > - endvalue = MAX_CPUMASK_BITS - 1; > - fprintf(stderr, > - "qemu: NUMA: A max of %d VCPUs are supported\n", > - MAX_CPUMASK_BITS); > - } > - > - if (endvalue < value) { > - goto error; > - } > - > - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); > - return; > - > -error: > - fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus); > - exit(1); > -} > - > -static void numa_add(const char *optarg) > -{ > - char option[128]; > - char *endptr; > - unsigned long long nodenr; > - > - optarg = get_opt_name(option, 128, optarg, ','); > - if (*optarg == ',') { > - optarg++; > - } > - if (!strcmp(option, "node")) { > - > - if (nb_numa_nodes >= MAX_NODES) { > - fprintf(stderr, "qemu: too many NUMA nodes\n"); > - exit(1); > - } > - > - if (get_param_value(option, 128, "nodeid", optarg) == 0) { > - nodenr = nb_numa_nodes; > - } else { > - if (parse_uint_full(option, &nodenr, 10) < 0) { > - fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option); > - exit(1); > - } > - } > - > - if (nodenr >= MAX_NODES) { > - fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr); > - exit(1); > - } > - > - if (get_param_value(option, 128, "mem", optarg) == 0) { > - node_mem[nodenr] = 0; > - } else { > - int64_t sval; > - sval = strtosz(option, &endptr); > - if (sval < 0 || *endptr) { > - fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); > - exit(1); > - } > - node_mem[nodenr] = sval; > - } > - if (get_param_value(option, 128, "cpus", optarg) != 0) { > - numa_node_parse_cpus(nodenr, option); > - } > - nb_numa_nodes++; > - } else { > - fprintf(stderr, "Invalid -numa option: %s\n", option); > - exit(1); > - } > -} > - > static QemuOptsList qemu_smp_opts = { > .name = "smp-opts", > .implied_opt_name = "cpus", > @@ -2961,6 +2866,7 @@ int main(int argc, char **argv, char **envp) > qemu_add_opts(&qemu_tpmdev_opts); > qemu_add_opts(&qemu_realtime_opts); > qemu_add_opts(&qemu_msg_opts); > + qemu_add_opts(&qemu_numa_opts); > > runstate_init(); > > @@ -2986,6 +2892,7 @@ int main(int argc, char **argv, char **envp) > } > > nb_numa_nodes = 0; > + nb_numa_mem_nodes = 0; > nb_nics = 0; > > bdrv_init_with_whitelist(); > @@ -3147,7 +3054,10 @@ int main(int argc, char **argv, char **envp) > } > break; > case QEMU_OPTION_numa: > - numa_add(optarg); > + opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1); > + if (!opts) { > + exit(1); > + } > break; > case QEMU_OPTION_display: > display_type = select_display(optarg); > @@ -4228,6 +4138,15 @@ int main(int argc, char **argv, char **envp) > > register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); > > + if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func, > + NULL, 1) != 0) { > + exit(1); > + } > + > + if (nb_numa_mem_nodes > nb_numa_nodes) { > + nb_numa_nodes = nb_numa_mem_nodes; > + } > + > if (nb_numa_nodes > 0) { > int i; > > -- > 1.8.4 > > -- Eduardo