[Qemu-devel] [PATCH V17 01/11] NUMA: move numa related code to new file numa.c
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- Makefile.target | 2 +- cpus.c | 14 include/sysemu/cpus.h | 1 - include/sysemu/sysemu.h | 3 + numa.c | 182 vl.c| 139 +--- 6 files changed, 187 insertions(+), 154 deletions(-) create mode 100644 numa.c diff --git a/Makefile.target b/Makefile.target index af6ac7e..0197c17 100644 --- a/Makefile.target +++ b/Makefile.target @@ -109,7 +109,7 @@ endif #CONFIG_BSD_USER # # System emulator target ifdef CONFIG_SOFTMMU -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o obj-y += hw/ obj-$(CONFIG_FDT) += device_tree.o diff --git a/cpus.c b/cpus.c index 01d128d..53360b0 100644 --- a/cpus.c +++ b/cpus.c @@ -1297,20 +1297,6 @@ static void tcg_exec_all(void) exit_request = 0; } -void set_numa_modes(void) -{ -CPUState *cpu; -int i; - -CPU_FOREACH(cpu) { -for (i = 0; i nb_numa_nodes; i++) { -if (test_bit(cpu-cpu_index, node_cpumask[i])) { -cpu-numa_node = i; -} -} -} -} - void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) { /* XXX: implement xxx_cpu_list for targets that still miss it */ diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 6502488..4f79081 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -23,7 +23,6 @@ extern int smp_threads; #define smp_threads 1 #endif -void set_numa_modes(void); void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg); #endif diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 495dae8..2509649 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -136,6 +136,9 @@ extern QEMUClockType rtc_clock; extern int nb_numa_nodes; extern uint64_t node_mem[MAX_NODES]; extern unsigned long *node_cpumask[MAX_NODES]; +void numa_add(const char *optarg); +void set_numa_nodes(void); +void set_numa_modes(void); #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/numa.c b/numa.c new file mode 100644 index 000..ce7736a --- /dev/null +++ b/numa.c @@ -0,0 +1,182 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2013 Fujitsu Ltd. + * Author: Wanlong Gao gaowanl...@cn.fujitsu.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include sysemu/sysemu.h + +static void numa_node_parse_cpus(int nodenr, const char *cpus) +{ +char *endptr; +unsigned long long value, endvalue; + +/* Empty CPU range strings will be considered valid, they will simply + * not set any bit in the CPU bitmap. + */ +if (!*cpus) { +return; +} + +if (parse_uint(cpus, value, endptr, 10) 0) { +goto error; +} +if (*endptr == '-') { +if (parse_uint_full(endptr + 1, endvalue, 10) 0) { +goto error; +} +} else if (*endptr == '\0') { +endvalue = value; +} else { +goto error; +} + +if (endvalue = MAX_CPUMASK_BITS) { +endvalue = MAX_CPUMASK_BITS - 1; +fprintf(stderr, +qemu: NUMA: A max of %d VCPUs are supported\n, + MAX_CPUMASK_BITS); +} + +if (endvalue value) { +goto error; +} + +bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); +return; + +error: +fprintf(stderr, qemu: Invalid NUMA CPU range: %s\n, cpus); +exit(1); +} + +void numa_add(const char *optarg) +{ +char option[128]; +char *endptr; +unsigned long long nodenr; + +optarg = get_opt_name(option, 128, optarg, ','); +if (*optarg == ',') { +optarg++; +} +if (!strcmp(option, node)) { + +if (nb_numa_nodes =
[Qemu-devel] [PATCH V17 00/11] Add support for binding guest numa nodes to host numa nodes
As you know, QEMU can't direct it's memory allocation now, this may cause guest cross node access performance regression. And, the worse thing is that if PCI-passthrough is used, direct-attached-device uses DMA transfer between device and qemu process. All pages of the guest will be pinned by get_user_pages(). KVM_ASSIGN_PCI_DEVICE ioctl kvm_vm_ioctl_assign_device() =kvm_assign_device() = kvm_iommu_map_memslots() = kvm_iommu_map_pages() = kvm_pin_pages() So, with direct-attached-device, all guest page's page count will be +1 and any page migration will not work. AutoNUMA won't too. So, we should set the guest nodes memory allocation policy before the pages are really mapped. According to this patch set, we are able to set guest nodes memory policy like following: -numa node,nodeid=0,cpus=0, \ -numa mem,size=1024M,policy=membind,host-nodes=0-1 \ -numa node,nodeid=1,cpus=1 \ -numa mem,size=1024M,policy=interleave,host-nodes=1 This supports policy={default|membind|interleave|preferred},relative=true,host-nodes=N-N like format. And add a QMP command query-numa to show numa info through this API. And convert the info numa monitor command to use this QMP command query-numa. This version removes set-mem-policy qmp and hmp commands temporarily as Marcelo and Paolo suggested. The simple test is like following: = Before: # numactl -H /qemu/x86_64-softmmu/qemu-system-x86_64 -m 4096 -smp 2 -numa node,nodeid=0,cpus=0,mem=2048 -numa node,nodeid=1,cpus=1,mem=2048 -hda 6u4ga2.qcow2 -enable-kvm -device pci-assign,host=07:00.1,id=hostdev0,bus=pci.0,addr=0x7 sleep 40 numactl -H [1] 13320 available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 5111 MB node 0 free: 4653 MB node 1 cpus: 1 3 node 1 size: 5120 MB node 1 free: 4764 MB node distances: node 0 1 0: 10 20 1: 20 10 available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 5111 MB node 0 free: 4317 MB node 1 cpus: 1 3 node 1 size: 5120 MB node 1 free: 876 MB node distances: node 0 1 0: 10 20 1: 20 10 After: # numactl -H /qemu/x86_64-softmmu/qemu-system-x86_64 -m 4096 -smp 4 -numa node,nodeid=0,cpus=0,cpus=2 -numa mem,size=2048M,policy=membind,host-nodes=0 -numa node,nodeid=0,cpus=1,cpus=3 -numa mem,size=2048M,policy=membind,host-nodes=1 -hda 6u4ga2.qcow2 -enable-kvm -device pci-assign,host=07:00.1,id=hostdev0,bus=pci.0,addr=0x7 sleep 40 numactl -H [1] 10862 available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 5111 MB node 0 free: 4718 MB node 1 cpus: 1 3 node 1 size: 5120 MB node 1 free: 4799 MB node distances: node 0 1 0: 10 20 1: 20 10 available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 5111 MB node 0 free: 2544 MB node 1 cpus: 1 3 node 1 size: 5120 MB node 1 free: 2725 MB node distances: node 0 1 0: 10 20 1: 20 10 === V1-V2: change to use QemuOpts in numa options (Paolo) handle Error in mpol parser (Paolo) change qmp command format to mem-policy=membind,mem-hostnode=0-1 like (Paolo) V2-V3: also handle Error in cpus parser (5/10) split out common parser from cpus and hostnode parser (Bandan 6/10) V3-V4: rebase to request for comments V4-V5: use OptVisitor and split -numa option (Paolo) - s/set-mpol/set-mem-policy (Andreas) - s/mem-policy/policy - s/mem-hostnode/host-nodes fix hmp command process after error (Luiz) add qmp command query-numa and convert info numa to it (Luiz) V5-V6: remove tabs in json file (Laszlo, Paolo) add back -numa node,mem=xxx as legacy (Paolo) change cpus and host-nodes to array (Laszlo, Eric) change nodeid to uint16 add NumaMemPolicy enum type (Eric) rebased on Laszlo's OptsVisitor: support / flatten integer ranges for repeating options patch set, thanks for Laszlo's help V6-V7: change UInt16 to uint16 (Laszlo) fix a typo in adding qmp command set-mem-policy V7-V8: rebase to current master with Laszlo's V2 of OptsVisitor patch set fix an adding white space line error V8-V9: rebase to current master check if total numa memory size is equal to ram_size (Paolo) add comments to the OptsVisitor stuff in qapi-schema.json (Eric, Laszlo) replace the use of numa_num_configured_nodes() (Andrew) avoid abusing the fact i==nodeid (Andrew) V9-V10: rebase to current master remove libnuma (Andrew) MAX_NODES=64 - MAX_NODES=128 since libnuma selected 128 (Andrew) use MAX_NODES instead of MAX_CPUMASK_BITS for host_mem bitmap (Andrew) remove a useless clear_bit() operation (Andrew) V10-V11: rebase to current master fix maxnode argument of mbind(2) V11-V12: rebase to current master split patch 02/11 of V11 (Eduardo) add some max value check (Eduardo) split MAX_NODES change patch (Eduardo) V12-V13: rebase to current master thanks for Luiz's review (Luiz) doc hmp command
[Qemu-devel] [PATCH V17 02/11] NUMA: check if the total numa memory size is equal to ram_size
If the total number of the assigned numa nodes memory is not equal to the assigned ram size, it will write the wrong data to ACPI talb, then the guest will ignore the wrong ACPI table and recognize all memory to one node. It's buggy, we should check it to ensure that we write the right data to ACPI table. Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- numa.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/numa.c b/numa.c index ce7736a..beda80e 100644 --- a/numa.c +++ b/numa.c @@ -150,6 +150,16 @@ void set_numa_nodes(void) node_mem[i] = ram_size - usedmem; } +uint64_t numa_total = 0; +for (i = 0; i nb_numa_nodes; i++) { +numa_total += node_mem[i]; +} +if (numa_total != ram_size) { +fprintf(stderr, qemu: numa nodes total memory size +should equal to ram_size\n); +exit(1); +} + for (i = 0; i nb_numa_nodes; i++) { if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { break; -- 1.8.5
[Qemu-devel] [PATCH V17 10/11] NUMA: add qmp command query-numa
Add qmp command query-numa to show guest NUMA information. Reviewed-by: Luiz Capitulino lcapitul...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- numa.c | 66 qapi-schema.json | 36 +++ qmp-commands.hx | 49 + 3 files changed, 151 insertions(+) diff --git a/numa.c b/numa.c index 43bba42..2954709 100644 --- a/numa.c +++ b/numa.c @@ -28,6 +28,7 @@ #include qapi/opts-visitor.h #include qapi/dealloc-visitor.h #include exec/memory.h +#include qmp-commands.h #ifdef __linux__ #include sys/syscall.h @@ -340,3 +341,68 @@ void set_numa_modes(void) } } } + +NUMANodeList *qmp_query_numa(Error **errp) +{ +NUMANodeList *head = NULL, *cur_item = NULL; +CPUState *cpu; +int i; + +for (i = 0; i nb_numa_nodes; i++) { +NUMANodeList *info; +uint16List *cur_cpu_item = NULL; +info = g_malloc0(sizeof(*info)); +info-value = g_malloc0(sizeof(*info-value)); +info-value-nodeid = i; +CPU_FOREACH(cpu) { +if (cpu-numa_node == i) { +uint16List *node_cpu = g_malloc0(sizeof(*node_cpu)); +node_cpu-value = cpu-cpu_index; + +if (!cur_cpu_item) { +info-value-cpus = cur_cpu_item = node_cpu; +} else { +cur_cpu_item-next = node_cpu; +cur_cpu_item = node_cpu; +} +} +} +info-value-memory = numa_info[i].node_mem; + +#ifdef __linux__ +info-value-policy = numa_info[i].policy; +info-value-relative = numa_info[i].relative; + +unsigned long first, next; +next = first = find_first_bit(numa_info[i].host_mem, MAX_NODES); +if (first == MAX_NODES) { +goto end; +} +uint16List *cur_node_item = g_malloc0(sizeof(*cur_node_item)); +cur_node_item-value = first; +info-value-host_nodes = cur_node_item; +do { +next = find_next_bit(numa_info[i].host_mem, MAX_NODES, + next + 1); +if (next == MAX_NODES) { +break; +} + +uint16List *host_node = g_malloc0(sizeof(*host_node)); +host_node-value = next; +cur_node_item-next = host_node; +cur_node_item = host_node; +} while (true); +end: +#endif + +if (!cur_item) { +head = cur_item = info; +} else { +cur_item-next = info; +cur_item = info; +} +} + +return head; +} diff --git a/qapi-schema.json b/qapi-schema.json index c0dad81..af947e2 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4289,3 +4289,39 @@ '*policy': 'NumaNodePolicy', '*relative': 'bool', '*host-nodes': ['uint16'] }} + +## +# @NUMANode: +# +# Information of guest NUMA node +# +# @nodeid: NUMA node ID +# +# @cpus: VCPUs contained in this node +# +# @memory: memory size of this node +# +# @policy: memory policy of this node +# +# @relative: if host nodes are relative for memory policy +# +# @host-nodes: host nodes for its memory policy +# +# Since: 1.7 +# +## +{ 'type': 'NUMANode', + 'data': {'nodeid': 'uint16', 'cpus': ['uint16'], 'memory': 'uint64', + 'policy': 'NumaNodePolicy', 'relative': 'bool', + 'host-nodes': ['uint16'] }} + +## +# @query-numa: +# +# Returns a list of information about each guest node. +# +# Returns: a list of @NUMANode for each guest node +# +# Since: 1.7 +## +{ 'command': 'query-numa', 'returns': ['NUMANode'] } diff --git a/qmp-commands.hx b/qmp-commands.hx index fba15cd..c2bc508 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3295,3 +3295,52 @@ Example (2): - { return: {} } EQMP + +{ +.name = query-numa, +.args_type = , +.mhandler.cmd_new = qmp_marshal_input_query_numa, +}, + +SQMP +query-numa +- + +Show NUMA information. + +Return a json-array. Each NUMA node is represented by a json-object, +which contains: + +- nodeid: NUMA node ID (json-int) +- cpus: a json-arry of contained VCPUs +- memory: amount of memory in each node in Byte (json-int) +- policy: memory policy of this node (json-string) +- relative: if host nodes is relative for its memory policy (json-bool) +- host-nodes: a json-array of host nodes for its memory policy + +Arguments: + +Example: + +- { excute: query-numa } +- { return:[ +{ +nodeid: 0, +cpus: [0, 1], +memory: 536870912, +policy: membind, +relative: false, +host-nodes: [0, 1] +}, +{ +nodeid: 1, +cpus: [2, 3], +memory: 536870912, +policy: interleave, +relative: false, +host-nodes: [1] +} + ] + } + +EQMP -- 1.8.5
[Qemu-devel] [PATCH V17 09/11] NUMA: set guest numa nodes memory policy
Set the guest numa nodes memory policies using the mbind(2) system call node by node. After this patch, we are able to set guest nodes memory policies through the QEMU options, this arms to solve the guest cross nodes memory access performance issue. And as you all know, if PCI-passthrough is used, direct-attached-device uses DMA transfer between device and qemu process. All pages of the guest will be pinned by get_user_pages(). KVM_ASSIGN_PCI_DEVICE ioctl kvm_vm_ioctl_assign_device() =kvm_assign_device() = kvm_iommu_map_memslots() = kvm_iommu_map_pages() = kvm_pin_pages() So, with direct-attached-device, all guest page's page count will be +1 and any page migration will not work. AutoNUMA won't too. So, we should set the guest nodes memory allocation policies before the pages are really mapped. Signed-off-by: Andre Przywara andre.przyw...@amd.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- hw/i386/pc.c | 9 + include/exec/memory.h | 15 numa.c| 99 +++ 3 files changed, 123 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 74c1f16..07553f2 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1178,6 +1178,10 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, memory_region_init_alias(ram_below_4g, NULL, ram-below-4g, ram, 0, below_4g_mem_size); memory_region_add_subregion(system_memory, 0, ram_below_4g); +if (memory_region_set_mem_policy(ram_below_4g, 0, below_4g_mem_size, 0)) { +fprintf(stderr, qemu: set below 4g memory policy failed\n); +exit(1); +} e820_add_entry(0, below_4g_mem_size, E820_RAM); if (above_4g_mem_size 0) { ram_above_4g = g_malloc(sizeof(*ram_above_4g)); @@ -1185,6 +1189,11 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, below_4g_mem_size, above_4g_mem_size); memory_region_add_subregion(system_memory, 0x1ULL, ram_above_4g); +if (memory_region_set_mem_policy(ram_above_4g, 0, above_4g_mem_size, + below_4g_mem_size)) { +fprintf(stderr, qemu: set above 4g memory policy failed\n); +exit(1); +} e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM); } diff --git a/include/exec/memory.h b/include/exec/memory.h index 480dfbf..33de50a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -905,6 +905,21 @@ void memory_region_transaction_begin(void); void memory_region_transaction_commit(void); /** + * memory_region_set_mem_policy: Set memory policy + * + * Set the memory policy for the specified area. + * + * @mr: a MemoryRegion we are setting memory policy for + * @start: the start offset of the specific region in this MemoryRegion + * @length: the specific memory area length + * @offset: the start offset of the specific area in NUMA setting + */ +int memory_region_set_mem_policy(MemoryRegion *mr, + ram_addr_t start, + ram_addr_t length, + ram_addr_t offset); + +/** * memory_listener_register: register callbacks to be called when memory * sections are mapped or unmapped into an address * space diff --git a/numa.c b/numa.c index da4dbbd..43bba42 100644 --- a/numa.c +++ b/numa.c @@ -27,6 +27,16 @@ #include qapi-visit.h #include qapi/opts-visitor.h #include qapi/dealloc-visitor.h +#include exec/memory.h + +#ifdef __linux__ +#include sys/syscall.h +#ifndef MPOL_F_RELATIVE_NODES +#define MPOL_F_RELATIVE_NODES (1 14) +#define MPOL_F_STATIC_NODES (1 15) +#endif +#endif + QemuOptsList qemu_numa_opts = { .name = numa, .implied_opt_name = type, @@ -228,6 +238,95 @@ void set_numa_nodes(void) } } +#ifdef __linux__ +static int node_parse_bind_mode(unsigned int nodeid) +{ +int bind_mode; + +switch (numa_info[nodeid].policy) { +case NUMA_NODE_POLICY_DEFAULT: +case NUMA_NODE_POLICY_PREFERRED: +case NUMA_NODE_POLICY_MEMBIND: +case NUMA_NODE_POLICY_INTERLEAVE: +bind_mode = numa_info[nodeid].policy; +break; +default: +bind_mode = NUMA_NODE_POLICY_DEFAULT; +return bind_mode; +} + +bind_mode |= numa_info[nodeid].relative ? +MPOL_F_RELATIVE_NODES : MPOL_F_STATIC_NODES; + +return bind_mode; +} + +static int node_set_mem_policy(void *ram_ptr, ram_addr_t length, int nodeid) +{ +int bind_mode = node_parse_bind_mode(nodeid); +unsigned long *nodes = numa_info[nodeid].host_mem; + +/* This is a workaround for a long standing bug in Linux' + * mbind implementation, which cuts off the last specified + * node. To stay compatible should this bug be fixed, we + * specify one more node and zero this one out. +
[Qemu-devel] [PATCH V17 06/11] NUMA: add -numa mem, options
Add -numa mem, option like following as Paolo suggested: -numa mem,nodeid=0,size=1G This new option will make later coming memory hotplug better. We will use the new options to specify nodes memory info, and just remain -numa node,mem=xx as legacy. Reviewed-by: Laszlo Ersek ler...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- include/sysemu/sysemu.h | 1 + numa.c | 36 qemu-options.hx | 6 -- vl.c| 2 ++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 20b05a3..291aa6a 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -135,6 +135,7 @@ extern QEMUClockType rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; +extern int nb_numa_mem_nodes; typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); diff --git a/numa.c b/numa.c index c4fa665..c676c5e 100644 --- a/numa.c +++ b/numa.c @@ -74,6 +74,31 @@ static int numa_node_parse(NumaNodeOptions *opts) return 0; } +static int numa_mem_parse(NumaMemOptions *opts) +{ +uint16_t nodenr; +uint64_t mem_size; + +if (opts-has_nodeid) { +nodenr = opts-nodeid; +} else { +nodenr = nb_numa_mem_nodes; +} + +if (nodenr = MAX_NODES) { +fprintf(stderr, qemu: Max number of NUMA nodes reached: % +PRIu16 \n, nodenr); +return -1; +} + +if (opts-has_size) { +mem_size = opts-size; +numa_info[nodenr].node_mem = mem_size; +} + +return 0; +} + int numa_init_func(QemuOpts *opts, void *opaque) { NumaOptions *object = NULL; @@ -101,6 +126,13 @@ int numa_init_func(QemuOpts *opts, void *opaque) } nb_numa_nodes++; break; +case NUMA_OPTIONS_KIND_MEM: +ret = numa_mem_parse(object-mem); +if (ret) { +goto error; +} +nb_numa_mem_nodes++; +break; default: fprintf(stderr, qemu: Invalid NUMA options type.\n); ret = -1; @@ -119,6 +151,10 @@ error: void set_numa_nodes(void) { +if (nb_numa_mem_nodes nb_numa_nodes) { +nb_numa_nodes = nb_numa_mem_nodes; +} + if (nb_numa_nodes 0) { int i; diff --git a/qemu-options.hx b/qemu-options.hx index 8b94264..e6afb6f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -95,11 +95,13 @@ specifies the maximum number of hotpluggable CPUs. ETEXI DEF(numa, HAS_ARG, QEMU_OPTION_numa, --numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n, QEMU_ARCH_ALL) +-numa node[,nodeid=node][,cpus=cpu[-cpu]]\n +-numa mem[,nodeid=node][,size=size]\n +, QEMU_ARCH_ALL) STEXI @item -numa @var{opts} @findex -numa -Simulate a multi node NUMA system. If mem and cpus are omitted, resources +Simulate a multi node NUMA system. If @var{size} and @var{cpus} are omitted, resources are split equally. ETEXI diff --git a/vl.c b/vl.c index e67f34a..064b821 100644 --- a/vl.c +++ b/vl.c @@ -250,6 +250,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order); int nb_numa_nodes; +int nb_numa_mem_nodes; NodeInfo numa_info[MAX_NODES]; uint8_t qemu_uuid[16]; @@ -2817,6 +2818,7 @@ int main(int argc, char **argv, char **envp) } nb_numa_nodes = 0; +nb_numa_mem_nodes = 0; nb_nics = 0; bdrv_init_with_whitelist(); -- 1.8.5
[Qemu-devel] [PATCH V17 07/11] NUMA: expand MAX_NODES from 64 to 128
libnuma choosed 128 for MAX_NODES, so we follow libnuma here. Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- include/sysemu/sysemu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 291aa6a..807619e 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -132,7 +132,7 @@ extern size_t boot_splash_filedata_size; extern uint8_t qemu_extra_params_fw[2]; extern QEMUClockType rtc_clock; -#define MAX_NODES 64 +#define MAX_NODES 128 #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; extern int nb_numa_mem_nodes; -- 1.8.5
[Qemu-devel] [PATCH V17 04/11] NUMA: convert -numa option to use OptsVisitor
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- include/sysemu/sysemu.h | 3 +- numa.c | 148 +++- qapi-schema.json| 30 ++ vl.c| 11 +++- 4 files changed, 114 insertions(+), 78 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index d873b42..20b05a3 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -140,9 +140,10 @@ typedef struct node_info { DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; -void numa_add(const char *optarg); void set_numa_nodes(void); void set_numa_modes(void); +extern QemuOptsList qemu_numa_opts; +int numa_init_func(QemuOpts *opts, void *opaque); #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/numa.c b/numa.c index 1bc0fad..c4fa665 100644 --- a/numa.c +++ b/numa.c @@ -24,101 +24,97 @@ */ #include sysemu/sysemu.h - -static void numa_node_parse_cpus(int nodenr, const char *cpus) +#include qapi-visit.h +#include qapi/opts-visitor.h +#include qapi/dealloc-visitor.h +QemuOptsList qemu_numa_opts = { +.name = numa, +.implied_opt_name = type, +.head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), +.desc = { { 0 } } /* validated with OptsVisitor */ +}; + +static int numa_node_parse(NumaNodeOptions *opts) { -char *endptr; -unsigned long long value, endvalue; - -/* Empty CPU range strings will be considered valid, they will simply - * not set any bit in the CPU bitmap. - */ -if (!*cpus) { -return; -} +uint16_t nodenr; +uint16List *cpus = NULL; -if (parse_uint(cpus, value, endptr, 10) 0) { -goto error; -} -if (*endptr == '-') { -if (parse_uint_full(endptr + 1, endvalue, 10) 0) { -goto error; -} -} else if (*endptr == '\0') { -endvalue = value; +if (opts-has_nodeid) { +nodenr = opts-nodeid; } else { -goto error; +nodenr = nb_numa_nodes; } -if (endvalue = MAX_CPUMASK_BITS) { -endvalue = MAX_CPUMASK_BITS - 1; -fprintf(stderr, -qemu: NUMA: A max of %d VCPUs are supported\n, - MAX_CPUMASK_BITS); +if (nodenr = MAX_NODES) { +fprintf(stderr, qemu: Max number of NUMA nodes reached: % +PRIu16 \n, nodenr); +return -1; } -if (endvalue value) { -goto error; +for (cpus = opts-cpus; cpus; cpus = cpus-next) { +if (cpus-value MAX_CPUMASK_BITS) { +fprintf(stderr, qemu: cpu number % PRIu16 is bigger than %d, +cpus-value, MAX_CPUMASK_BITS); +continue; +} +bitmap_set(numa_info[nodenr].node_cpu, cpus-value, 1); } -bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1); -return; +if (opts-has_mem) { +int64_t mem_size; +char *endptr; +mem_size = strtosz(opts-mem, endptr); +if (mem_size 0 || *endptr) { +fprintf(stderr, qemu: invalid numa mem size: %s\n, opts-mem); +return -1; +} +numa_info[nodenr].node_mem = mem_size; +} -error: -fprintf(stderr, qemu: Invalid NUMA CPU range: %s\n, cpus); -exit(1); +return 0; } -void numa_add(const char *optarg) +int numa_init_func(QemuOpts *opts, void *opaque) { -char option[128]; -char *endptr; -unsigned long long nodenr; - -optarg = get_opt_name(option, 128, optarg, ','); -if (*optarg == ',') { -optarg++; +NumaOptions *object = NULL; +Error *err = NULL; +int ret = 0; + +{ +OptsVisitor *ov = opts_visitor_new(opts); +visit_type_NumaOptions(opts_get_visitor(ov), object, NULL, err); +opts_visitor_cleanup(ov); } -if (!strcmp(option, node)) { - -if (nb_numa_nodes = MAX_NODES) { -fprintf(stderr, qemu: too many NUMA nodes\n); -exit(1); -} -if (get_param_value(option, 128, nodeid, optarg) == 0) { -nodenr = nb_numa_nodes; -} else { -if (parse_uint_full(option, nodenr, 10) 0) { -fprintf(stderr, qemu: Invalid NUMA nodeid: %s\n, option); -exit(1); -} -} - -if (nodenr = MAX_NODES) { -fprintf(stderr, qemu: invalid NUMA nodeid: %llu\n, nodenr); -exit(1); -} +if (error_is_set(err)) { +fprintf(stderr, qemu: %s\n, error_get_pretty(err)); +error_free(err); +ret = -1; +goto error; +} -if (get_param_value(option, 128, mem, optarg) == 0) { -numa_info[nodenr].node_mem = 0; -} else { -int64_t sval; -sval = strtosz(option, endptr); -if (sval 0 || *endptr) { -fprintf(stderr, qemu: invalid numa mem size: %s\n, optarg); -exit(1); -
[Qemu-devel] [PATCH V17 03/11] NUMA: Add numa_info structure to contain numa nodes info
Add the numa_info structure to contain the numa nodes memory, VCPUs information and the future added numa nodes host memory policies. Reviewed-by: Eduardo Habkost ehabk...@redhat.com Signed-off-by: Andre Przywara andre.przyw...@amd.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- hw/i386/pc.c| 12 include/sysemu/sysemu.h | 8 ++-- monitor.c | 2 +- numa.c | 23 --- vl.c| 7 +++ 5 files changed, 30 insertions(+), 22 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 12c436e..74c1f16 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -670,14 +670,14 @@ static FWCfgState *bochs_bios_init(void) unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id apic_id_limit); for (j = 0; j nb_numa_nodes; j++) { -if (test_bit(i, node_cpumask[j])) { +if (test_bit(i, numa_info[j].node_cpu)) { numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); break; } } } for (i = 0; i nb_numa_nodes; i++) { -numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); +numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, (1 + apic_id_limit + nb_numa_nodes) * @@ -1072,8 +1072,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, guest_info-apic_id_limit = pc_apic_id_limit(max_cpus); guest_info-apic_xrupt_override = kvm_allows_irq0_override(); guest_info-numa_nodes = nb_numa_nodes; -guest_info-node_mem = g_memdup(node_mem, guest_info-numa_nodes * +guest_info-node_mem = g_malloc0(guest_info-numa_nodes * sizeof *guest_info-node_mem); +for (i = 0; i nb_numa_nodes; i++) { +guest_info-node_mem[i] = numa_info[i].node_mem; +} + guest_info-node_cpu = g_malloc0(guest_info-apic_id_limit * sizeof *guest_info-node_cpu); @@ -1081,7 +1085,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id guest_info-apic_id_limit); for (j = 0; j nb_numa_nodes; j++) { -if (test_bit(i, node_cpumask[j])) { +if (test_bit(i, numa_info[j].node_cpu)) { guest_info-node_cpu[apic_id] = j; break; } diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 2509649..d873b42 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -9,6 +9,7 @@ #include qapi-types.h #include qemu/notify.h #include qemu/main-loop.h +#include qemu/bitmap.h /* vl.c */ @@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; -extern uint64_t node_mem[MAX_NODES]; -extern unsigned long *node_cpumask[MAX_NODES]; +typedef struct node_info { +uint64_t node_mem; +DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); +} NodeInfo; +extern NodeInfo numa_info[MAX_NODES]; void numa_add(const char *optarg); void set_numa_nodes(void); void set_numa_modes(void); diff --git a/monitor.c b/monitor.c index 845f608..b97b7d3 100644 --- a/monitor.c +++ b/monitor.c @@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict) } monitor_printf(mon, \n); monitor_printf(mon, node %d size: % PRId64 MB\n, i, -node_mem[i] 20); +numa_info[i].node_mem 20); } } diff --git a/numa.c b/numa.c index beda80e..1bc0fad 100644 --- a/numa.c +++ b/numa.c @@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus) goto error; } -bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); +bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1); return; error: @@ -101,7 +101,7 @@ void numa_add(const char *optarg) } if (get_param_value(option, 128, mem, optarg) == 0) { -node_mem[nodenr] = 0; +numa_info[nodenr].node_mem = 0; } else { int64_t sval; sval = strtosz(option, endptr); @@ -109,7 +109,7 @@ void numa_add(const char *optarg) fprintf(stderr, qemu: invalid numa mem size: %s\n, optarg); exit(1); } -node_mem[nodenr] = sval; +numa_info[nodenr].node_mem = sval; } if (get_param_value(option, 128, cpus, optarg) != 0) { numa_node_parse_cpus(nodenr, option); @@ -134,7 +134,7 @@ void set_numa_nodes(void) * and distribute the available memory equally across all nodes */ for (i = 0; i nb_numa_nodes; i++) { -if (node_mem[i] != 0) +if (numa_info[i].node_mem != 0) break; }
[Qemu-devel] [PATCH V17 08/11] NUMA: parse guest numa nodes memory policy
The memory policy setting format is like: policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N And we are adding this setting as a suboption of -numa mem,, the memory policy then can be set like following: -numa node,nodeid=0,cpus=0 \ -numa node,nodeid=1,cpus=1 \ -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \ -numa mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1 Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- include/sysemu/sysemu.h | 3 +++ numa.c | 18 ++ qapi-schema.json| 33 +++-- vl.c| 3 +++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 807619e..82f1447 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -139,6 +139,9 @@ extern int nb_numa_mem_nodes; typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); +DECLARE_BITMAP(host_mem, MAX_NODES); +NumaNodePolicy policy; +bool relative; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; void set_numa_nodes(void); diff --git a/numa.c b/numa.c index c676c5e..da4dbbd 100644 --- a/numa.c +++ b/numa.c @@ -78,6 +78,7 @@ static int numa_mem_parse(NumaMemOptions *opts) { uint16_t nodenr; uint64_t mem_size; +uint16List *nodes; if (opts-has_nodeid) { nodenr = opts-nodeid; @@ -96,6 +97,23 @@ static int numa_mem_parse(NumaMemOptions *opts) numa_info[nodenr].node_mem = mem_size; } +if (opts-has_policy) { +numa_info[nodenr].policy = opts-policy; +} + +if (opts-has_relative) { +numa_info[nodenr].relative = opts-relative; +} + +for (nodes = opts-host_nodes; nodes; nodes = nodes-next) { +if (nodes-value MAX_NODES) { +fprintf(stderr, qemu: node number % PRIu16 is bigger than %d\n, +nodes-value, MAX_NODES); +continue; +} +bitmap_set(numa_info[nodenr].host_mem, nodes-value, 1); +} + return 0; } diff --git a/qapi-schema.json b/qapi-schema.json index 1043e57..c0dad81 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4246,6 +4246,26 @@ '*mem':'str' }} ## +# @NumaNodePolicy +# +# NUMA node policy types +# +# @default: restore default policy, remove any nondefault policy +# +# @preferred: set the preferred node for allocation +# +# @membind: a strict policy that restricts memory allocation to the +# nodes specified +# +# @interleave: the page allocations is interleaved across the set +# of nodes specified +# +# Since 1.7 +## +{ 'enum': 'NumaNodePolicy', + 'data': [ 'default', 'preferred', 'membind', 'interleave' ] } + +## # @NumaMemOptions # # Set memory information of guest NUMA node. (for OptsVisitor) @@ -4254,9 +4274,18 @@ # # @size: #optional memory size of this node # +# @policy: #optional memory policy of this node +# +# @relative: #optional if the nodes specified are relative +# +# @host-nodes: #optional host nodes for its memory policy +# # Since 1.7 ## { 'type': 'NumaMemOptions', 'data': { - '*nodeid': 'uint16', - '*size': 'size' }} + '*nodeid': 'uint16', + '*size': 'size', + '*policy': 'NumaNodePolicy', + '*relative': 'bool', + '*host-nodes': ['uint16'] }} diff --git a/vl.c b/vl.c index 064b821..95d03f5 100644 --- a/vl.c +++ b/vl.c @@ -2815,6 +2815,9 @@ int main(int argc, char **argv, char **envp) for (i = 0; i MAX_NODES; i++) { numa_info[i].node_mem = 0; bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); +bitmap_zero(numa_info[i].host_mem, MAX_NODES); +numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT; +numa_info[i].relative = false; } nb_numa_nodes = 0; -- 1.8.5
[Qemu-devel] [PATCH V17 05/11] NUMA: introduce NumaMemOptions
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- qapi-schema.json | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/qapi-schema.json b/qapi-schema.json index db539b6..1043e57 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4223,7 +4223,8 @@ ## { 'union': 'NumaOptions', 'data': { -'node': 'NumaNodeOptions' }} +'node': 'NumaNodeOptions', +'mem' : 'NumaMemOptions' }} ## # @NumaNodeOptions @@ -4243,3 +4244,19 @@ '*nodeid': 'uint16', '*cpus': ['uint16'], '*mem':'str' }} + +## +# @NumaMemOptions +# +# Set memory information of guest NUMA node. (for OptsVisitor) +# +# @nodeid: #optional NUMA node ID +# +# @size: #optional memory size of this node +# +# Since 1.7 +## +{ 'type': 'NumaMemOptions', + 'data': { + '*nodeid': 'uint16', + '*size': 'size' }} -- 1.8.5
[Qemu-devel] [PATCH V17 11/11] NUMA: convert hmp command info_numa to use qmp command query_numa
Reviewed-by: Luiz Capitulino lcapitul...@redhat.com Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com --- hmp.c | 57 + hmp.h | 1 + monitor.c | 21 + 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/hmp.c b/hmp.c index 32ee285..d6dedd2 100644 --- a/hmp.c +++ b/hmp.c @@ -24,6 +24,10 @@ #include ui/console.h #include block/qapi.h #include qemu-io.h +#include qapi-visit.h +#include qapi/opts-visitor.h +#include qapi/dealloc-visitor.h +#include sysemu/sysemu.h static void hmp_handle_error(Monitor *mon, Error **errp) { @@ -1564,3 +1568,56 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, err); } + +void hmp_info_numa(Monitor *mon, const QDict *qdict) +{ +NUMANodeList *node_list, *node; +uint16List *head; +int nodeid; +char *policy_str = NULL; + +node_list = qmp_query_numa(NULL); + +monitor_printf(mon, %d nodes\n, nb_numa_nodes); +for (node = node_list; node; node = node-next) { +nodeid = node-value-nodeid; +monitor_printf(mon, node %d cpus:, nodeid); +head = node-value-cpus; +for (head = node-value-cpus; head != NULL; head = head-next) { +monitor_printf(mon, %d, (int)head-value); +} +monitor_printf(mon, \n); +monitor_printf(mon, node %d size: % PRId64 MB\n, + nodeid, node-value-memory 20); +switch (node-value-policy) { +case NUMA_NODE_POLICY_DEFAULT: +policy_str = g_strdup(default); +break; +case NUMA_NODE_POLICY_PREFERRED: +policy_str = g_strdup(preferred); +break; +case NUMA_NODE_POLICY_MEMBIND: +policy_str = g_strdup(membind); +break; +case NUMA_NODE_POLICY_INTERLEAVE: +policy_str = g_strdup(interleave); +break; +default: +break; +} +monitor_printf(mon, node %d policy: %s\n, + nodeid, policy_str ? : ); +if (policy_str) { +free(policy_str); +} +monitor_printf(mon, node %d relative: %s\n, nodeid, + node-value-relative ? true : false); +monitor_printf(mon, node %d host-nodes:, nodeid); +for (head = node-value-host_nodes; head != NULL; head = head-next) { +monitor_printf(mon, %d, (int)head-value); +} +monitor_printf(mon, \n); +} + +qapi_free_NUMANodeList(node_list); +} diff --git a/hmp.h b/hmp.h index 54cf71f..4f8d39b 100644 --- a/hmp.h +++ b/hmp.h @@ -37,6 +37,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict); void hmp_info_pci(Monitor *mon, const QDict *qdict); void hmp_info_block_jobs(Monitor *mon, const QDict *qdict); void hmp_info_tpm(Monitor *mon, const QDict *qdict); +void hmp_info_numa(Monitor *mon, const QDict *qdict); void hmp_quit(Monitor *mon, const QDict *qdict); void hmp_stop(Monitor *mon, const QDict *qdict); void hmp_system_reset(Monitor *mon, const QDict *qdict); diff --git a/monitor.c b/monitor.c index b97b7d3..f747a48 100644 --- a/monitor.c +++ b/monitor.c @@ -1989,25 +1989,6 @@ static void do_info_mtree(Monitor *mon, const QDict *qdict) mtree_info((fprintf_function)monitor_printf, mon); } -static void do_info_numa(Monitor *mon, const QDict *qdict) -{ -int i; -CPUState *cpu; - -monitor_printf(mon, %d nodes\n, nb_numa_nodes); -for (i = 0; i nb_numa_nodes; i++) { -monitor_printf(mon, node %d cpus:, i); -CPU_FOREACH(cpu) { -if (cpu-numa_node == i) { -monitor_printf(mon, %d, cpu-cpu_index); -} -} -monitor_printf(mon, \n); -monitor_printf(mon, node %d size: % PRId64 MB\n, i, -numa_info[i].node_mem 20); -} -} - #ifdef CONFIG_PROFILER int64_t qemu_time; @@ -2775,7 +2756,7 @@ static mon_cmd_t info_cmds[] = { .args_type = , .params = , .help = show NUMA information, -.mhandler.cmd = do_info_numa, +.mhandler.cmd = hmp_info_numa, }, { .name = usb, -- 1.8.5
[Qemu-devel] [PATCH v8 1/5] vmstate: add VMSTATE_PTIMER_ARRAY
Signed-off-by: liguang lig.f...@cn.fujitsu.com --- include/migration/vmstate.h |4 savevm.c| 31 +++ 2 files changed, 35 insertions(+), 0 deletions(-) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 9d09e60..977cf52 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -165,6 +165,7 @@ extern const VMStateInfo vmstate_info_timer; extern const VMStateInfo vmstate_info_buffer; extern const VMStateInfo vmstate_info_unused_buffer; extern const VMStateInfo vmstate_info_bitmap; +extern const VMStateInfo vmstate_info_ptimer; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) @@ -613,6 +614,9 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_TIMER_ARRAY(_f, _s, _n) \ VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *) +#define VMSTATE_PTIMER_ARRAY(_f, _s, _n)\ +VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_ptimer, ptimer_state *) + #define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool) diff --git a/savevm.c b/savevm.c index 2f631d4..54dbb33 100644 --- a/savevm.c +++ b/savevm.c @@ -30,6 +30,7 @@ #include monitor/monitor.h #include sysemu/sysemu.h #include qemu/timer.h +#include hw/ptimer.h #include audio/audio.h #include migration/migration.h #include qemu/sockets.h @@ -1362,6 +1363,36 @@ const VMStateInfo vmstate_info_timer = { .put = put_timer, }; +static int get_ptimer(QEMUFile *f, void *pv, size_t size) +{ +ptimer_state *v = pv; +uint64_t count; + +count = qemu_get_be64(f); +if (count != -1) { +ptimer_set_count(v, count); +} else { +ptimer_stop(v); +} + +return 0; +} + +static void put_ptimer(QEMUFile *f, void *pv, size_t size) +{ +ptimer_state *v = pv; +uint64_t count; + +count = ptimer_get_count(v); +qemu_put_be64(f, count); +} + +const VMStateInfo vmstate_info_ptimer = { +.name = ptimer, +.get = get_ptimer, +.put = put_ptimer, +}; + /* uint8_t buffers */ static int get_buffer(QEMUFile *f, void *pv, size_t size) -- 1.7.2.5
[Qemu-devel] [PATCH v8 2/5] hw/timer: add allwinner a10 timer
Signed-off-by: liguang lig.f...@cn.fujitsu.com --- default-configs/arm-softmmu.mak |2 + hw/timer/Makefile.objs |2 + hw/timer/allwinner-a10-pit.c | 253 ++ include/hw/timer/allwinner-a10-pit.h | 57 4 files changed, 314 insertions(+), 0 deletions(-) create mode 100644 hw/timer/allwinner-a10-pit.c create mode 100644 include/hw/timer/allwinner-a10-pit.h diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index a555eef..0029596 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -81,3 +81,5 @@ CONFIG_VERSATILE_I2C=y CONFIG_SDHCI=y CONFIG_INTEGRATOR_DEBUG=y + +CONFIG_ALLWINNER_A10=y diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs index eca5905..3020388 100644 --- a/hw/timer/Makefile.objs +++ b/hw/timer/Makefile.objs @@ -27,3 +27,5 @@ obj-$(CONFIG_SH4) += sh_timer.o obj-$(CONFIG_TUSB6010) += tusb6010.o obj-$(CONFIG_MC146818RTC) += mc146818rtc.o + +obj-$(CONFIG_ALLWINNER_A10) += allwinner-a10-pit.o diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c new file mode 100644 index 000..2305813 --- /dev/null +++ b/hw/timer/allwinner-a10-pit.c @@ -0,0 +1,253 @@ +/* + * Allwinner A10 timer device emulation + * + * Copyright (C) 2013 Li Guang + * Written by Li Guang lig.f...@cn.fujitsu.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include hw/sysbus.h +#include sysemu/sysemu.h +#include hw/timer/allwinner-a10-pit.h + + +static uint64_t a10_pit_read(void *opaque, hwaddr offset, unsigned size) +{ +AwA10PITState *s = AW_A10_PIT(opaque); +uint8_t index; + +switch (offset) { +case AW_A10_PIT_TIMER_IRQ_EN: +return s-irq_enable; +case AW_A10_PIT_TIMER_IRQ_ST: +return s-irq_status; +case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE * 6 + AW_A10_PIT_TIMER_COUNT: +index = offset 0xf0; +index = 4; +index -= 1; +switch (offset 0x0f) { +case AW_A10_PIT_TIMER_CONTROL: +return s-control[index]; +case AW_A10_PIT_TIMER_INTERVAL: +return s-interval[index]; +case AW_A10_PIT_TIMER_COUNT: +s-count[index] = ptimer_get_count(s-timer[index]); +return s-count[index]; +default: +qemu_log_mask(LOG_GUEST_ERROR, + %s: Bad offset 0x%x\n, __func__, (int)offset); +break; +} +case AW_A10_PIT_WDOG_CONTROL: +break; +case AW_A10_PIT_WDOG_MODE: +break; +case AW_A10_PIT_COUNT_LO: +return s-count_lo; +case AW_A10_PIT_COUNT_HI: +return s-count_hi; +case AW_A10_PIT_COUNT_CTL: +return s-count_ctl; +default: +qemu_log_mask(LOG_GUEST_ERROR, + %s: Bad offset 0x%x\n, __func__, (int)offset); +break; +} + +return 0; +} + +static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value, +unsigned size) +{ + AwA10PITState *s = AW_A10_PIT(opaque); + uint8_t index; + +switch (offset) { +case AW_A10_PIT_TIMER_IRQ_EN: +s-irq_enable = value; +break; +case AW_A10_PIT_TIMER_IRQ_ST: +s-irq_status = ~value; +break; +case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE * 6 + AW_A10_PIT_TIMER_COUNT: +index = offset 0xf0; +index = 4; +index -= 1; +switch (offset 0x0f) { +case AW_A10_PIT_TIMER_CONTROL: +s-control[index] = value; +if (s-control[index] AW_A10_PIT_TIMER_RELOAD) { +ptimer_set_count(s-timer[index], s-interval[index]); +} +if (s-control[index] AW_A10_PIT_TIMER_EN) { +int oneshot = 0; +if (s-control[index] AW_A10_PIT_TIMER_MODE) { +oneshot = 1; +} +ptimer_run(s-timer[index], oneshot); +} else { +ptimer_stop(s-timer[index]); +} +break; +case AW_A10_PIT_TIMER_INTERVAL: +s-interval[index] = value; +ptimer_set_limit(s-timer[index], s-interval[index], 1); +break; +case AW_A10_PIT_TIMER_COUNT: +s-count[index] = value; +break; +default: +qemu_log_mask(LOG_GUEST_ERROR, + %s: Bad offset 0x%x\n, __func__, (int)offset); +} +break; +case
[Qemu-devel] [PATCH v8 4/5] hw/arm: add allwinner a10 SoC support
Signed-off-by: liguang lig.f...@cn.fujitsu.com --- hw/arm/Makefile.objs |2 +- hw/arm/allwinner-a10.c | 77 include/hw/arm/allwinner-a10.h | 36 ++ 3 files changed, 114 insertions(+), 1 deletions(-) create mode 100644 hw/arm/allwinner-a10.c create mode 100644 include/hw/arm/allwinner-a10.h diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs index 3671b42..b9e5983 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o -obj-y += omap1.o omap2.o strongarm.o +obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c new file mode 100644 index 000..c4699b7 --- /dev/null +++ b/hw/arm/allwinner-a10.c @@ -0,0 +1,77 @@ +#include hw/sysbus.h +#include hw/devices.h +#include hw/arm/allwinner-a10.h + + +static void aw_a10_init(Object *obj) +{ +AwA10State *s = AW_A10(obj); +DeviceState *dev; + +object_initialize(s-cpu, sizeof(s-cpu), cortex-a8- TYPE_ARM_CPU); +object_property_add_child(obj, cpu, OBJECT(s-cpu), NULL); + +object_initialize(s-intc, sizeof(s-timer), TYPE_AW_A10_PIC); +dev = DEVICE(s-intc); +qdev_set_parent_bus(dev, sysbus_get_default()); + +object_initialize(s-timer, sizeof(s-timer), TYPE_AW_A10_PIT); +dev = DEVICE(s-timer); +qdev_set_parent_bus(dev, sysbus_get_default()); +} + +static void aw_a10_realize(DeviceState *dev, Error **errp) +{ +AwA10State *s = AW_A10(dev); +SysBusDevice *sysbusdev; +uint8_t i; +Error *err = NULL; + +object_property_set_bool(OBJECT(s-cpu), true, realized, err); +s-cpu_irq[0] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_IRQ); +s-cpu_irq[1] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_FIQ); + +object_property_set_bool(OBJECT(s-intc), true, realized, err); +sysbusdev = SYS_BUS_DEVICE(s-intc); +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIC_REG_BASE); +sysbus_connect_irq(sysbusdev, 0, s-cpu_irq[0]); +sysbus_connect_irq(sysbusdev, 1, s-cpu_irq[1]); +for (i = 0; i AW_A10_PIC_INT_NR; i++) { +s-irq[i] = qdev_get_gpio_in(DEVICE(s-intc), i); +} + +object_property_set_bool(OBJECT(s-timer), true, realized, err); +sysbusdev = SYS_BUS_DEVICE(s-timer); +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIT_REG_BASE); +sysbus_connect_irq(sysbusdev, 0, s-irq[22]); +sysbus_connect_irq(sysbusdev, 1, s-irq[23]); +sysbus_connect_irq(sysbusdev, 2, s-irq[24]); +sysbus_connect_irq(sysbusdev, 3, s-irq[25]); +sysbus_connect_irq(sysbusdev, 4, s-irq[67]); +sysbus_connect_irq(sysbusdev, 5, s-irq[68]); + +serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, s-irq[1], + 115200, serial_hds[0], DEVICE_NATIVE_ENDIAN); +} + +static void aw_a10_class_init(ObjectClass *oc, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(oc); + +dc-realize = aw_a10_realize; +} + +static const TypeInfo aw_a10_type_info = { +.name = TYPE_AW_A10, +.parent = TYPE_DEVICE, +.instance_size = sizeof(AwA10State), +.instance_init = aw_a10_init, +.class_init = aw_a10_class_init, +}; + +static void aw_a10_register_types(void) +{ +type_register_static(aw_a10_type_info); +} + +type_init(aw_a10_register_types) diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h new file mode 100644 index 000..a3e7b77 --- /dev/null +++ b/include/hw/arm/allwinner-a10.h @@ -0,0 +1,36 @@ +#ifndef ALLWINNER_H_ + +#include qemu-common.h +#include qemu/error-report.h +#include hw/char/serial.h +#include hw/arm/arm.h +#include hw/timer/allwinner-a10-pit.h +#include hw/intc/allwinner-a10-pic.h + +#include sysemu/sysemu.h +#include exec/address-spaces.h + + +#define AW_A10_PIC_REG_BASE 0x01c20400 +#define AW_A10_PIT_REG_BASE 0x01c20c00 +#define AW_A10_UART0_REG_BASE 0x01c28000 + +#define AW_A10_SDRAM_BASE 0x4000 + +#define TYPE_AW_A10 allwiner-a10 +#define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10) + +typedef struct AwA10State { +/* private */ +DeviceState parent_obj; +/* public */ + +ARMCPU cpu; +qemu_irq irq[AW_A10_PIC_INT_NR]; +qemu_irq cpu_irq[2]; +AwA10PITState timer; +AwA10PICState intc; +} AwA10State; + +#define ALLWINNER_H_ +#endif -- 1.7.2.5
[Qemu-devel] [PATCH v8 5/5] hw/arm: add cubieboard support
Signed-off-by: liguang lig.f...@cn.fujitsu.com --- hw/arm/Makefile.objs |2 +- hw/arm/cubieboard.c | 52 ++ 2 files changed, 53 insertions(+), 1 deletions(-) create mode 100644 hw/arm/cubieboard.c diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs index b9e5983..8be8d8e 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o -obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o +obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o cubieboard.o diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c new file mode 100644 index 000..0599061 --- /dev/null +++ b/hw/arm/cubieboard.c @@ -0,0 +1,52 @@ +#include hw/sysbus.h +#include hw/devices.h +#include hw/boards.h +#include hw/arm/allwinner-a10.h + + +static struct arm_boot_info cubieboard_binfo = { +.loader_start = AW_A10_SDRAM_BASE, +.board_id = 0x1008, +}; + +typedef struct CubieBoardState { +AwA10State *a10; +MemoryRegion sdram; +} CubieBoardState; + +static void cubieboard_init(QEMUMachineInitArgs *args) +{ +CubieBoardState *s = g_new(CubieBoardState, 1); +Error *err = NULL; + +s-a10 = AW_A10(object_new(TYPE_AW_A10)); +object_property_set_bool(OBJECT(s-a10), true, realized, err); +if (err != NULL) { +error_report(Couldn't realize Allwinner A10: %s\n, +error_get_pretty(err)); +exit(1); +} + +memory_region_init_ram(s-sdram, NULL, cubieboard.ram, args-ram_size); +vmstate_register_ram_global(s-sdram); +memory_region_add_subregion(get_system_memory(), AW_A10_SDRAM_BASE, +s-sdram); +cubieboard_binfo.ram_size = args-ram_size; +cubieboard_binfo.kernel_filename = args-kernel_filename; +cubieboard_binfo.kernel_cmdline = args-kernel_cmdline; +arm_load_kernel(s-a10-cpu, cubieboard_binfo); +} + +static QEMUMachine cubieboard_machine = { +.name = cubieboard, +.desc = cubietec cubieboard emulation, +.init = cubieboard_init, +}; + + +static void cubieboard_machine_init(void) +{ +qemu_register_machine(cubieboard_machine); +} + +machine_init(cubieboard_machine_init) -- 1.7.2.5
[Qemu-devel] [PATCH v8 3/5] hw/intc: add allwinner A10 interrupt controller
Signed-off-by: liguang lig.f...@cn.fujitsu.com --- hw/intc/Makefile.objs |1 + hw/intc/allwinner-a10-pic.c | 218 +++ include/hw/intc/allwinner-a10-pic.h | 40 +++ 3 files changed, 259 insertions(+), 0 deletions(-) create mode 100644 hw/intc/allwinner-a10-pic.c create mode 100644 include/hw/intc/allwinner-a10-pic.h diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 47ac442..ec977c4 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -24,3 +24,4 @@ obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o obj-$(CONFIG_SH4) += sh_intc.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_XICS_KVM) += xics_kvm.o +obj-$(CONFIG_ALLWINNER_A10) += allwinner-a10-pic.o diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c new file mode 100644 index 000..fdc9e4e --- /dev/null +++ b/hw/intc/allwinner-a10-pic.c @@ -0,0 +1,218 @@ +/* + * Allwinner A10 interrupt controller device emulation + * + * Copyright (C) 2013 Li Guang + * Written by Li Guang lig.f...@cn.fujitsu.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include hw/sysbus.h +#include hw/devices.h +#include sysemu/sysemu.h +#include hw/intc/allwinner-a10-pic.h + + +static void aw_a10_pic_update(AwA10PICState *s) +{ +uint8_t i, j; +bool irq = false, fiq = false; + +for (i = 0, j = 0; i AW_A10_PIC_REG_NUM; i++) { +if (s-irq_pending[i] == 0 s-fiq_pending[i] == 0) { +continue; +} +for (j = 0; j 32; j++) { +if (test_bit(j, (void *)s-mask[i])) { +continue; +} +if (test_bit(j, (void *)s-irq_pending[i])) { +irq = true; +} +if (test_bit(j, (void *)s-fiq_pending[i]) +test_bit(j, (void *)s-select[i])) { +fiq = true; +} +if (irq fiq) { +goto out; +} +} +} + +out: +qemu_set_irq(s-parent_irq, irq); +qemu_set_irq(s-parent_fiq, fiq); +} + +static void aw_a10_pic_set_irq(void *opaque, int irq, int level) +{ +AwA10PICState *s = opaque; + +if (level) { +set_bit(irq%32, (void *)s-irq_pending[irq/32]); +} +aw_a10_pic_update(s); +} + +static uint64_t aw_a10_pic_read(void *opaque, hwaddr offset, unsigned size) +{ +AwA10PICState *s = opaque; +uint8_t index = (offset 0xc)/4; + +switch (offset) { +case AW_A10_PIC_VECTOR: +return s-vector; +case AW_A10_PIC_BASE_ADDR: +return s-base_addr; +case AW_A10_PIC_PROTECT: +return s-protect; +case AW_A10_PIC_NMI: +return s-nmi; +case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8: +return s-irq_pending[index]; +case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8: +return s-fiq_pending[index]; +case AW_A10_PIC_SELECT ... AW_A10_PIC_SELECT + 8: +return s-select[index]; +case AW_A10_PIC_ENABLE ... AW_A10_PIC_ENABLE + 8: +return s-enable[index]; +case AW_A10_PIC_MASK ... AW_A10_PIC_MASK + 8: +return s-mask[index]; +default: +qemu_log_mask(LOG_GUEST_ERROR, + %s: Bad offset 0x%x\n, __func__, (int)offset); +break; +} + +return 0; +} + +static void aw_a10_pic_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ +AwA10PICState *s = opaque; +uint8_t index = (offset 0xc)/4; + +switch (offset) { +case AW_A10_PIC_VECTOR: +s-vector = value ~0x3; +break; +case AW_A10_PIC_BASE_ADDR: +s-base_addr = value ~0x3; +case AW_A10_PIC_PROTECT: +s-protect = value; +break; +case AW_A10_PIC_NMI: +s-nmi = value; +break; +case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8: +s-irq_pending[index] = ~value; +break; +case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8: +s-fiq_pending[index] = ~value; +break; +case AW_A10_PIC_SELECT ... AW_A10_PIC_SELECT + 8: +s-select[index] = value; +break; +case AW_A10_PIC_ENABLE ... AW_A10_PIC_ENABLE + 8: +s-enable[index] = value; +break; +case AW_A10_PIC_MASK ... AW_A10_PIC_MASK + 8: +s-mask[index] = value; +break; +default: +qemu_log_mask(LOG_GUEST_ERROR, + %s: Bad offset 0x%x\n, __func__, (int)offset); +break; +} + +
[Qemu-devel] [PATCH v8 0/5] add allwinner A10 SoC support
lay a foundation for allwinner A10 SoC with a cortex-a8 processor, and will add more devices later. v2: split timer and interrupt controller emulation into their corresponding files. v3: 1. change loader_start address 2. add 64-bit counter 3. fixup fail to clear interrup status issue v4: 1. add VMSD 2. use defines of magic number for readability 3. code cleanup v5: 1. add VMSTATE_PTIMER_ARRAY 2. code cleanup v6: 1. fix a fiq lost issue pointed out by Peter Crosthwaite 2. code cleanup v7: model allwinner A10 as a SoC device, and add cubieboard. v8: 1. A10 be QOMified as a device 2. add AW as prefix of A10 TODO: 1. add BROM support 2. add more devices test: can boot-up officially released linux kernel build with PLL disabled. reference: http://linux-sunxi.org/Main_Page Li Guang (5) vmstate: add VMSTATE_PTIMER_ARRAY hw/timer: add allwinner a10 timer hw/intc: add allwinner A10 interrupt controller hw/arm: add allwinner a10 SoC support hw/arm: add cubieboard support default-configs/arm-softmmu.mak | 2 + hw/arm/Makefile.objs | 4 +- hw/arm/allwinner-a10.c | 39 +++ hw/arm/cubieboard.c | 33 + hw/intc/Makefile.objs| 1 + hw/intc/allwinner-a10_pic.c | 218 +++ hw/timer/Makefile.objs | 2 + hw/timer/allwinner-a10_pit.c | 253 ++ include/hw/arm/allwinner-a10.h | 27 +++ include/hw/intc/allwinner-a10_pic.h | 40 +++ include/hw/timer/allwinner-a10_pit.h | 57 include/migration/vmstate.h | 4 savevm.c | 31 +++ 13 files changed, 709 insertions(+), 2 deletions(-) create mode 100644 hw/timer/allwinner-a10_pit.c create mode 100644 include/hw/timer/allwinner-a10_pit.h create mode 100644 hw/intc/allwinner-a10_pic.c create mode 100644 include/hw/intc/allwinner-a10_pic.h create mode 100644 hw/arm/allwinner-a10.c create mode 100644 include/hw/arm/allwinner-a10.h create mode 100644 hw/arm/cubieboard.c
Re: [Qemu-devel] gpu and console chicken and egg
On Mi, 2013-12-04 at 17:02 +1000, Dave Airlie wrote: So I've hit a bit of a init ordering issue that I'm not sure how best to solve, Just some background: In order for the virt GPU and the UI layer (SDL or GTK etc) to interact properly over OpenGL use, I have created and OpenGL provider in the console, and the UI layer can register callbacks for a single GL provider (only one makes sense really) when it starts up. This is mainly to be used for context management and swap buffers management. Now in the virtio GPU I'd was going to use a virtio feature to say whether the qemu hw can support the 3D renderer, dependant on whether it was linked with the virgl renderer and whether the current UI was GL capable. Hmm, why does it depend on the UI? Wasn't the plan to render into a dma-buf no matter what? Then either read the rendered result from the dmabuf (non-gl UI like vnc) or let the (gl-capable) UI pass the dma-buf to the compositor? Also note that the virtio-gpu gl-capability needs to be configurable for live migration reasons, so you can migrate between hosts with different 3d capabilities. Something like -device virtio-gpu,gl={none,gl2,gl3,host} where none turns it off, gl $version specifies the gl support level and host makes it depend on the host capabilities (simliar to -cpu host). For starters you can leave out host and depend on the user set this. So is there a method to modify the advertised feature bits later in the setup sequence before the guest is started? You can register a callback to be notified when the guest is started/stopped (qemu_add_vm_change_state_handler). That could be used although it is a bit hackish. cheers, Gerd PS: Now that 1.7 is out of the door and 2.0 tree is open for development we should start getting the bits which are ready merged to make your patch backlog smaller. SDL2 would be a good start I think.
Re: [Qemu-devel] [PATCH v2 0/2]
Your cover letter lacks a proper subject, and your patches lack proper threading. git-send-email threads properly (unless you mess up its configuration).
Re: [Qemu-devel] [PATCH 07/23] hw/pci/pcie_aer.c: fix buffer overruns on invalid state load
On Tue, Dec 03, 2013 at 08:59:52PM +, Peter Maydell wrote: On 3 December 2013 20:41, Michael S. Tsirkin m...@redhat.com wrote: On Tue, Dec 03, 2013 at 06:30:46PM +, Peter Maydell wrote: On 3 December 2013 16:28, Michael S. Tsirkin m...@redhat.com wrote: const VMStateDescription vmstate_pcie_aer_log = { .name = PCIE_AER_ERROR_LOG, .version_id = 1, .minimum_version_id = 1, .minimum_version_id_old = 1, +.post_load = pcie_aer_state_post_load, .fields = (VMStateField[]) { VMSTATE_UINT16(log_num, PCIEAERLog), -VMSTATE_UINT16(log_max, PCIEAERLog), -VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num, +VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog), +VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_max, vmstate_pcie_aer_err, PCIEAERErr), VMSTATE_END_OF_LIST() } Isn't this a migration compability break? How is it a break? If a QEMU with this patch sends data to a QEMU without it, then the receiving end will think it should expect log_num array entries but the sending end is going to send log_max of them. Conversely, an old-new migration is going to send fewer array entries than the destination expects. Or have I misinterpreted how the VARRAY entries work? thanks -- PMM Ah, got it. You are right, good catch. I think we need VMSTATE_UINT16_LE, this will make sure log_num = log_max without changing VMSTATE_STRUCT_VARRAY size.
Re: [Qemu-devel] [PATCH v2 2/2] target-i386: Intel MPX
Il 04/12/2013 08:56, Liu, Jinsong ha scritto: From 256484fd75d4eb4d248e5e0f493f16182da59dc2 Mon Sep 17 00:00:00 2001 From: Liu Jinsong jinsong@intel.com Date: Wed, 4 Dec 2013 16:56:49 +0800 Subject: [PATCH v2 2/2] target-i386: Intel MPX Add some MPX related definiation, and hardcode sizes and offsets of xsave features 3 and 4. It also add corresponding part to kvm_get/put_xsave. Signed-off-by: Liu Jinsong jinsong@intel.com --- target-i386/cpu.c |4 target-i386/cpu.h | 24 +--- target-i386/kvm.c | 10 ++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 544b57f..52ca029 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -336,6 +336,10 @@ typedef struct ExtSaveArea { static const ExtSaveArea ext_save_areas[] = { [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX, .offset = 0x240, .size = 0x100 }, +[3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, +.offset = 0x3c0, .size = 0x40 }, +[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, +.offset = 0x400, .size = 0x10 }, }; const char *get_register_name_32(unsigned int reg) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index ea373e8..4020591 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -380,9 +380,12 @@ #define MSR_VM_HSAVE_PA 0xc0010117 -#define XSTATE_FP 1 -#define XSTATE_SSE 2 -#define XSTATE_YMM 4 +#define XSTATE_FP (1ULL 0) +#define XSTATE_SSE (1ULL 1) +#define XSTATE_YMM (1ULL 2) +#define XSTATE_BNDREGS (1ULL 3) +#define XSTATE_BNDCSR (1ULL 4) + /* CPUID feature words */ typedef enum FeatureWord { @@ -545,6 +548,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_ERMS (1 9) #define CPUID_7_0_EBX_INVPCID (1 10) #define CPUID_7_0_EBX_RTM (1 11) +#define CPUID_7_0_EBX_MPX (1 14) #define CPUID_7_0_EBX_RDSEED (1 18) #define CPUID_7_0_EBX_ADX (1 19) #define CPUID_7_0_EBX_SMAP (1 20) @@ -695,6 +699,18 @@ typedef union { uint64_t q; } MMXReg; +typedef struct BNDReg { +uint64_t lb; +uint64_t ub; +} BNDReg; + +typedef struct BNDCSReg { +uint64_t cfg; +uint64_t pad; +uint64_t sts_lo; +uint64_t sts_hi; +} BNDCSReg; + #ifdef HOST_WORDS_BIGENDIAN #define XMM_B(n) _b[15 - (n)] #define XMM_W(n) _w[7 - (n)] @@ -912,6 +928,8 @@ typedef struct CPUX86State { uint64_t xstate_bv; XMMReg ymmh_regs[CPU_NB_REGS]; +BNDReg bnd_regs[4]; +BNDCSReg bndcs_regs; uint64_t xcr0; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 749aa09..347d3d3 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -980,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_XMM_SPACE 40 #define XSAVE_XSTATE_BV 128 #define XSAVE_YMMH_SPACE 144 +#define XSAVE_BNDREGS 240 +#define XSAVE_BNDCSR 256 static int kvm_put_xsave(X86CPU *cpu) { @@ -1012,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu) *(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv; memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs, sizeof env-ymmh_regs); +memcpy(xsave-region[XSAVE_BNDREGS], env-bnd_regs, +sizeof env-bnd_regs); +memcpy(xsave-region[XSAVE_BNDCSR], env-bndcs_regs, +sizeof(env-bndcs_regs)); r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1294,6 +1300,10 @@ static int kvm_get_xsave(X86CPU *cpu) env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV]; memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE], sizeof env-ymmh_regs); +memcpy(env-bnd_regs, xsave-region[XSAVE_BNDREGS], +sizeof env-bnd_regs); +memcpy(env-bndcs_regs, xsave-region[XSAVE_BNDCSR], +sizeof(env-bndcs_regs)); return 0; } Almost there. Migration (vmstate) is still missing. Paolo
Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP
Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: Normally the user is expected to eject DVD if it is not locked by the guest. eject_device() makes few checks and calls bdrv_close() if DVD is not in use. However it is still possible to eject DVD even if it is in use. For that, QEMU sets eject requested flag, the guest reads it, issues ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case, bdrv_close() is not called anywhere so it remains inserted in QEMU's terms. This is expected behavior, and matches what IDE does. Markus, can you confirm? Paolo
Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL
Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: This prevents the guest from preventing DVD medium removal when there is no medium. Without this, if the user has ejected a DVD, it is possible to have a block device with an open tray, no media but locked. I need to check this against a real disk. Thanks! Paolo
[Qemu-devel] [PATCH V7 5/6] qemu-iotests: add test for snapshot in qemu-img convert
Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- tests/qemu-iotests/058 | 19 ++- tests/qemu-iotests/058.out | 12 2 files changed, 30 insertions(+), 1 deletions(-) diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 index cf50857..14584cd 100755 --- a/tests/qemu-iotests/058 +++ b/tests/qemu-iotests/058 @@ -1,6 +1,6 @@ #!/bin/bash # -# Test export internal snapshot by qemu-nbd. +# Test export internal snapshot by qemu-nbd, convert it by qemu-img. # # Copyright (C) 2013 IBM, Inc. # @@ -54,6 +54,8 @@ _wait_for_nbd() exit 1 } +converted_image=$TEST_IMG.converted + _export_nbd_snapshot() { _cleanup_nbd @@ -74,6 +76,7 @@ _cleanup() { _cleanup_nbd _cleanup_test_img +rm -f $converted_image } trap _cleanup; exit \$status 0 1 2 3 15 @@ -115,6 +118,20 @@ echo == verifying the exported snapshot with patterns, method 2 == $QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $nbd_snapshot_img | _filter_qemu_io $QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $nbd_snapshot_img | _filter_qemu_io +$QEMU_IMG convert $TEST_IMG -l sn1 -O qcow2 $converted_image + +echo +echo == verifying the converted snapshot with patterns, method 1 == +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io + +$QEMU_IMG convert $TEST_IMG -l snapshot.name=sn1 -O qcow2 $converted_image + +echo +echo == verifying the converted snapshot with patterns, method 2 == +$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io +$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io + # success, all done echo *** done rm -f $seq.full diff --git a/tests/qemu-iotests/058.out b/tests/qemu-iotests/058.out index 768ac61..9a69379 100644 --- a/tests/qemu-iotests/058.out +++ b/tests/qemu-iotests/058.out @@ -29,4 +29,16 @@ read 4096/4096 bytes at offset 4096 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 4096/4096 bytes at offset 8192 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verifying the converted snapshot with patterns, method 1 == +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 8192 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verifying the converted snapshot with patterns, method 2 == +read 4096/4096 bytes at offset 4096 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 8192 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done -- 1.7.1
[Qemu-devel] [PATCH V7 1/6] snapshot: distinguish id and name in load_tmp
Since later this function will be used so improve it. The only caller of it now is qemu-img, and it is not impacted by introduce function bdrv_snapshot_load_tmp_by_id_or_name() that call bdrv_snapshot_load_tmp() twice to keep old search logic. bdrv_snapshot_load_tmp_by_id_or_name() return int to let caller know the errno, and errno will be used later. Also fix a typo in comments of bdrv_snapshot_delete(). Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- block/qcow2-snapshot.c| 10 ++- block/qcow2.h |5 +++- block/snapshot.c | 59 ++-- include/block/block_int.h |4 ++- include/block/snapshot.h |7 - qemu-img.c|8 - 6 files changed, 83 insertions(+), 10 deletions(-) diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 3529c68..ad8bf3d 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -675,7 +675,10 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) return s-nb_snapshots; } -int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) +int qcow2_snapshot_load_tmp(BlockDriverState *bs, +const char *snapshot_id, +const char *name, +Error **errp) { int i, snapshot_index; BDRVQcowState *s = bs-opaque; @@ -687,8 +690,10 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) assert(bs-read_only); /* Search the snapshot */ -snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); +snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); if (snapshot_index 0) { +error_setg(errp, + Can't find snapshot); return -ENOENT; } sn = s-snapshots[snapshot_index]; @@ -699,6 +704,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) ret = bdrv_pread(bs-file, sn-l1_table_offset, new_l1_table, new_l1_bytes); if (ret 0) { +error_setg(errp, Failed to read l1 table for snapshot); g_free(new_l1_table); return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index 922e190..303eb26 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -488,7 +488,10 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *name, Error **errp); int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab); -int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name); +int qcow2_snapshot_load_tmp(BlockDriverState *bs, +const char *snapshot_id, +const char *name, +Error **errp); void qcow2_free_snapshots(BlockDriverState *bs); int qcow2_read_snapshots(BlockDriverState *bs); diff --git a/block/snapshot.c b/block/snapshot.c index a05c0c0..565222e 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -194,7 +194,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, * If only @snapshot_id is specified, delete the first one with id * @snapshot_id. * If only @name is specified, delete the first one with name @name. - * if none is specified, return -ENINVAL. + * if none is specified, return -EINVAL. * * Returns: 0 on success, -errno on failure. If @bs is not inserted, return * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs @@ -265,18 +265,71 @@ int bdrv_snapshot_list(BlockDriverState *bs, return -ENOTSUP; } +/** + * Temporarily load an internal snapshot by @snapshot_id and @name. + * @bs: block device used in the operation + * @snapshot_id: unique snapshot ID, or NULL + * @name: snapshot name, or NULL + * @errp: location to store error + * + * If both @snapshot_id and @name are specified, load the first one with + * id @snapshot_id and name @name. + * If only @snapshot_id is specified, load the first one with id + * @snapshot_id. + * If only @name is specified, load the first one with name @name. + * if none is specified, return -EINVAL. + * + * Returns: 0 on success, -errno on fail. If @bs is not inserted, return + * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support + * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and + * @name, return -ENOENT. If @errp != NULL, it will always be filled on + * failure. + */ int bdrv_snapshot_load_tmp(BlockDriverState *bs, -const char *snapshot_name) + const char *snapshot_id, + const char *name, + Error **errp) { BlockDriver *drv = bs-drv; + if (!drv) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; } +if (!snapshot_id !name) { +error_setg(errp, snapshot_id and name are both NULL); +return
[Qemu-devel] [PATCH V7 4/6] qemu-img: add -l for snapshot in convert
Now qemu-img convert have similar options as qemu-nbd for internal snapshot. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- qemu-img-cmds.hx |4 ++-- qemu-img.c | 44 +++- qemu-img.texi| 12 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index da1d965..d029609 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF(convert, img_convert, -convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename) +convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename) STEXI -@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF(info, img_info, diff --git a/qemu-img.c b/qemu-img.c index a73beb5..ee940c2 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -93,6 +93,11 @@ static void help(void) 'options' is a comma separated list of format specific options in a\n name=value format. Use -o ? for an overview of the options supported by the\n used format\n + 'snapshot_param' is param used for internal snapshot, format\n + is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n + '[ID_OR_NAME]'\n + 'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n + instead\n '-c' indicates that target image must be compressed (qcow format only)\n '-u' enables unsafe rebasing. It is assumed that old and new backing file\n match exactly. The image doesn't need a working backing file before\n @@ -1140,6 +1145,7 @@ static int img_convert(int argc, char **argv) int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */ bool quiet = false; Error *local_err = NULL; +QemuOpts *sn_opts = NULL; fmt = NULL; out_fmt = raw; @@ -1148,7 +1154,7 @@ static int img_convert(int argc, char **argv) compress = 0; skip_create = 0; for(;;) { -c = getopt(argc, argv, f:O:B:s:hce6o:pS:t:qn); +c = getopt(argc, argv, f:O:B:s:hce6o:pS:t:qnl:); if (c == -1) { break; } @@ -1183,6 +1189,18 @@ static int img_convert(int argc, char **argv) case 's': snapshot_name = optarg; break; +case 'l': +if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { +sn_opts = qemu_opts_parse(internal_snapshot_opts, optarg, 0); +if (!sn_opts) { +error_report(Failed in parsing snapshot param '%s', + optarg); +return 1; +} +} else { +snapshot_name = optarg; +} +break; case 'S': { int64_t sval; @@ -1254,7 +1272,12 @@ static int img_convert(int argc, char **argv) total_sectors += bs_sectors; } -if (snapshot_name != NULL) { +if (sn_opts) { +ret = bdrv_snapshot_load_tmp(bs[0], + qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), + qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), + local_err); +} else if (snapshot_name != NULL) { if (bs_n 1) { error_report(No support for concatenating multiple snapshot); ret = -1; @@ -1262,13 +1285,13 @@ static int img_convert(int argc, char **argv) } bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, local_err); -if (error_is_set(local_err)) { -error_report(Failed to load snapshot: %s, - error_get_pretty(local_err)); -error_free(local_err); -ret = -1; -goto out; -} +} +if (error_is_set(local_err)) { +error_report(Failed to load snapshot: %s, + error_get_pretty(local_err)); +error_free(local_err); +ret = -1; +goto out; } /* Find driver and parse its options */ @@ -1559,6 +1582,9 @@ out: free_option_parameters(create_options); free_option_parameters(param); qemu_vfree(buf); +if (sn_opts) { +qemu_opts_del(sn_opts); +} if (out_bs) { bdrv_unref(out_bs);
Re: [Qemu-devel] [RFC PATCH v1 0/5] Add error_abort and associated cleanups
Eric Blake ebl...@redhat.com writes: On 12/03/2013 01:33 PM, Igor Mammedov wrote: Also, is it worth adding asserts and/or compiler annotations to require that the Error **err argument of functions be non-NULL, to ensure that callers are always passing either a valid destination or one of the special addresses? But doing so would probably require adding a special address for error_ignore for callers that intend to discard an error in cases where the return type of the function lets them know to proceed with a fallback implementation (that is, cases where ignoring an error makes sense). Right now, we use NULL as ignore errors argument. NULL gives us a chance to express caller must not ignore errors via some non-null annotation that gets fed to a static analyzer. I doubt that would be possible with a special error_ignore object. Anyway, this series is about abort on error. Let's keep ignore errors issues separate. I'm sorry for hijacking thread, but that actually an issue that started an original discussion. Where void returning QOM API functions are used with NULL, without any chance to detect that error happened. So abusing NULL errp in this functions might lead to hard to find runtime errors. I think Eric's suggestion was to enforce passing non NULL errp and let caller to deal with error gracefully so that above mentioned misuse was impossible. Why is ignoring errors from void foo(...) like API considered acceptable? Okay, so it sounds like consensus is that using NULL as the means to ignore errors is okay when there is an alternative way to detect error, but that for any function that returns void, adding an assert(errp) would be appropriate because the caller cannot safely ignore the failure. It's not worth inventing an error_ignore special address, but for functions that have no way to report errors except via errp, then it IS worth enforcing that the caller is either prepared to handle the error or has passed error_abort (or any other special addresses we add later). No objection to asserting that the caller passed an error object when the error object is the only way to signal failure. You can't force your callers to check for failure, but the assertion could help prevent accidental misuse. Assertions fire at run-time, though. Asserting argument not null first thing in the function should enable a sufficiently smart whole-program static checker to flag null arguments. But having such a static check right at compile-time would be much better. Could attribute nonnull do it? If yes, do we still need the assertion?
Re: [Qemu-devel] [PATCH] qemu-img create: add -o nocow option
On Mon, Dec 2, 2013 at 4:54 AM, Chunyan Liu cy...@suse.com wrote: 2013/11/25 Kevin Wolf kw...@redhat.com Am 21.11.2013 um 09:51 hat Stefan Hajnoczi geschrieben: On Thu, Nov 21, 2013 at 11:33:56AM +0800, Chunyan Liu wrote: 2013/11/20 Stefan Hajnoczi stefa...@gmail.com On Wed, Nov 20, 2013 at 04:50:29PM +0800, Chunyan Liu wrote: block/cow.c | 22 ++ block/qcow.c | 22 ++ block/qcow2.c | 22 ++ I think you can avoid modifying all the image formats: .bdrv_create() functions pass options to bdrv_create_file(). Therefore an image format like qcow2 does not need to parse the nocow option itself. Only raw-posix.c:.bdrv_create() needs to know about the nocow option. In existing code, options passed to bdrv_create_file contains no option in fact. And if we pass all options to bdrv_create_file directly, raw-posix.c: .bdrv_create() will get NOCOW option but at the same time get SIZE option, it will create a file with total size. For cow/qcow/qcow2, I suppose it's not expected? In current code, bdrv_create_file will create a zero-sized image for cow/qcow/qcow2. I see what the problem is: the loop that parses options does options++. So by the time it has processed them the options pointer will be NULL or point to a terminator option (option-name == NULL). I was confused because there has been a patch series which changes .bdrv_create() option parsing. But I forgot it hasn't been merged. https://lists.gnu.org/archive/html/qemu-devel/2013-08/msg01695.html Kevin: Have you looked at .bdrv_create() QEMUOptionParameter removal recently? Otherwise I'm inclined to merge Chunyan's patch - we can always refactor the code later when QEMUOptionParameter is removed. No, I haven't. I think the QemuOpts conversion series was relatively close, but it seems nobody is working on it any more. Someone should probably pick it up and finish it. Hi, Kevin Stefan, About the nocow option, according to current status, how should we proceed? Any further work needs to do? We talked on IRC about rebasing the QEMUOptionParameter series. Let's get the in so the 'nocow' change can be made cleanly. Stefan
[Qemu-devel] [PATCH V7 0/6] export internal snapshot by qemu-nbd
This series allow user to read internal snapshot's contents without qemu-img convert. V2: Address Stefan's comments: 02: add 'fall through' comments in the case statement. 03: add doc about the difference of internal snapshot and backing chain snapshot, which is used in previous '--snapshot' parameter. Other: 01,04: rebased on upstream with conflict resolved. v3: Address Paolo's comments: 02: add parameter -l snapshot_id_or_name, rename options snapshot-load to load-snapshot, use QemuOpts. 03: rename snapshot-load to load-snapshot. 04: related change to test both -l and -L case. 05-07: add similar parameter for qemu-img convert. Other: 01: foldered original snapshot logic into function bdrv_snapshot_load_tmp_by_id_or_name(), since multiple caller present in this version. Refined error message from , reason: %s to : %s. 02: Refined error message from , reason: %s to : %s. 03: Rename PARAM to SNAPSHOT_PARAM. v4: Address Eric's comments: 01: typo fix. 02: squashed patch. Use only one option -l and distiguish the cases by the starting string. 03: remove 'eval', remove '_supported_os Linux', remove the comments that have typo. 04: squashed patch. Add only one option -l and distiguish the cases by the starting string. 05: remove indentation. Address Paolo's comments: 02: Use only one option -l and distiguish the cases by the starting string. 04: Use only one option -l and distiguish the cases by the starting string, mark old -s as deprecated, added doc for them. v5: Address Jeff's comments: Quote image name in test cases. v6: Address Kevin's comments: 1: typo fix, remove device and snapshot info in error message. 2: use strstart(). 3: use _require_command(), limit proto to file, since when proto=nbd it can't work. also changed _require_command() to tip better. 4: use strstart(). 6: new patch, since I found the doc missing in debugging. v7: Address Stefan's comments: 3: use unix socket and wait for 30s, to make the case reliable, folder 4: add missing doc change. Other: 3,5: add label method 1/method 2 in echo message, to tip better. Wenchao Xia (6): 1 snapshot: distinguish id and name in load_tmp 2 qemu-nbd: support internal snapshot export 3 qemu-iotests: add 058 internal snapshot export with qemu-nbd case 4 qemu-img: add -l for snapshot in convert 5 qemu-iotests: add test for snapshot in qemu-img convert 6 qemu-nbd: add doc for option -f block/qcow2-snapshot.c | 10 +++- block/qcow2.h|5 +- block/snapshot.c | 77 ++- include/block/block_int.h|4 +- include/block/snapshot.h | 15 - qemu-img-cmds.hx |4 +- qemu-img.c | 44 +++-- qemu-img.texi| 12 +++- qemu-nbd.c | 47 ++- qemu-nbd.texi| 10 +++- tests/qemu-iotests/058 | 138 ++ tests/qemu-iotests/058.out | 44 + tests/qemu-iotests/check |1 + tests/qemu-iotests/common.rc |3 +- tests/qemu-iotests/group |1 + 15 files changed, 390 insertions(+), 25 deletions(-) create mode 100755 tests/qemu-iotests/058 create mode 100644 tests/qemu-iotests/058.out
[Qemu-devel] [PATCH V7 2/6] qemu-nbd: support internal snapshot export
Now it is possible to directly export an internal snapshot, which can be used to probe the snapshot's contents without qemu-img convert. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- block/snapshot.c | 18 ++ include/block/snapshot.h |8 qemu-nbd.c | 46 -- qemu-nbd.texi|8 +++- 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index 565222e..9047f8d 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -25,6 +25,24 @@ #include block/snapshot.h #include block/block_int.h +QemuOptsList internal_snapshot_opts = { +.name = snapshot, +.head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head), +.desc = { +{ +.name = SNAPSHOT_OPT_ID, +.type = QEMU_OPT_STRING, +.help = snapshot id +},{ +.name = SNAPSHOT_OPT_NAME, +.type = QEMU_OPT_STRING, +.help = snapshot name +},{ +/* end of list */ +} +}, +}; + int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, const char *name) { diff --git a/include/block/snapshot.h b/include/block/snapshot.h index d05bea7..770d9bb 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -27,6 +27,14 @@ #include qemu-common.h #include qapi/error.h +#include qemu/option.h + + +#define SNAPSHOT_OPT_BASE snapshot. +#define SNAPSHOT_OPT_ID snapshot.id +#define SNAPSHOT_OPT_NAME snapshot.name + +extern QemuOptsList internal_snapshot_opts; typedef struct QEMUSnapshotInfo { char id_str[128]; /* unique snapshot id */ diff --git a/qemu-nbd.c b/qemu-nbd.c index c26c98e..fe6053c 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -20,6 +20,7 @@ #include block/block.h #include block/nbd.h #include qemu/main-loop.h +#include block/snapshot.h #include stdarg.h #include stdio.h @@ -79,7 +80,14 @@ static void usage(const char *name) \n Block device options:\n -r, --read-only export read-only\n - -s, --snapshot use snapshot file\n + -s, --snapshot use FILE as an external snapshot, create a temporary\n + file with backing_file=FILE, redirect the write to\n + the temporary one\n + -l, --load-snapshot=SNAPSHOT_PARAM\n + load an internal snapshot inside FILE and export it\n + as an read-only device, SNAPSHOT_PARAM format is\n + 'snapshot.id=[ID],snapshot.name=[NAME]', or\n + '[ID_OR_NAME]'\n -n, --nocachedisable host cache\n --cache=MODE set cache mode (none, writeback, ...)\n #ifdef CONFIG_LINUX_AIO @@ -315,7 +323,9 @@ int main(int argc, char **argv) char *device = NULL; int port = NBD_DEFAULT_PORT; off_t fd_size; -const char *sopt = hVb:o:p:rsnP:c:dvk:e:f:t; +QemuOpts *sn_opts = NULL; +const char *sn_id_or_name = NULL; +const char *sopt = hVb:o:p:rsnP:c:dvk:e:f:tl:; struct option lopt[] = { { help, 0, NULL, 'h' }, { version, 0, NULL, 'V' }, @@ -328,6 +338,7 @@ int main(int argc, char **argv) { connect, 1, NULL, 'c' }, { disconnect, 0, NULL, 'd' }, { snapshot, 0, NULL, 's' }, +{ load-snapshot, 1, NULL, 'l' }, { nocache, 0, NULL, 'n' }, { cache, 1, NULL, QEMU_NBD_OPT_CACHE }, #ifdef CONFIG_LINUX_AIO @@ -428,6 +439,17 @@ int main(int argc, char **argv) errx(EXIT_FAILURE, Offset must be positive `%s', optarg); } break; +case 'l': +if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { +sn_opts = qemu_opts_parse(internal_snapshot_opts, optarg, 0); +if (!sn_opts) { +errx(EXIT_FAILURE, Failed in parsing snapshot param `%s', + optarg); +} +} else { +sn_id_or_name = optarg; +} +/* fall through */ case 'r': nbdflags |= NBD_FLAG_READ_ONLY; flags = ~BDRV_O_RDWR; @@ -581,6 +603,22 @@ int main(int argc, char **argv) error_get_pretty(local_err)); } +if (sn_opts) { +ret = bdrv_snapshot_load_tmp(bs, + qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), + qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), + local_err); +} else if (sn_id_or_name) { +ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name, + local_err); +} +if (ret 0) { +errno = -ret; +err(EXIT_FAILURE, +Failed to load snapshot: %s, +error_get_pretty(local_err)); +} + fd_size = bdrv_getlength(bs);
Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP
Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: Normally the user is expected to eject DVD if it is not locked by the guest. eject_device() makes few checks and calls bdrv_close() if DVD is not in use. However it is still possible to eject DVD even if it is in use. For that, QEMU sets eject requested flag, the guest reads it, issues ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case, bdrv_close() is not called anywhere so it remains inserted in QEMU's terms. This is expected behavior, and matches what IDE does. Markus, can you confirm? Confirmed. See commit 4be9762. Alexey, monitor commands eject does two things: it first opens the tray, and if that works, it removes the medium. If the tray is locked closed, it tells the device model that eject was requested. Works just like the physical eject button. With -f, it then rips out the medium. This is similar to opening the tray with a unbent paperclip. Let's ignore this case. The scsi-cd device model tells the guest about the eject request. A well-behaved guest will then command the device to unlock and open the tray. The guest uses the same commands on behalf of its applications, e.g. /usr/bin/eject. Your patch changes behavior of eject /dev/sr0 eject -t /dev/sr0: you no longer get the same medium back. You normally do with real hardware. The somewhat unfortunate consequence is that monitor command eject can only remove the medium when the tray is not locked.
Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL
Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: This prevents the guest from preventing DVD medium removal when there is no medium. Without this, if the user has ejected a DVD, it is possible to have a block device with an open tray, no media but locked. I need to check this against a real disk. Thanks! If I remember correctly, I did, and it works. You can still close the open tray in this state.
[Qemu-devel] [PATCH V7 6/6] qemu-nbd: add doc for option -f
Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- qemu-nbd.c|1 + qemu-nbd.texi |2 ++ 2 files changed, 3 insertions(+), 0 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index fe6053c..136e8c9 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -79,6 +79,7 @@ static void usage(const char *name) #endif \n Block device options:\n + -f, --format=FORMAT set image format (raw, qcow2, ...)\n -r, --read-only export read-only\n -s, --snapshot use FILE as an external snapshot, create a temporary\n file with backing_file=FILE, redirect the write to\n diff --git a/qemu-nbd.texi b/qemu-nbd.texi index 5b55f76..0a7e013 100644 --- a/qemu-nbd.texi +++ b/qemu-nbd.texi @@ -22,6 +22,8 @@ Export QEMU disk image using NBD protocol. interface to bind to (default @samp{0.0.0.0}) @item -k, --socket=@var{path} Use a unix socket with path @var{path} +@item -f, --format=@var{format} + Set image format as @var{format} @item -r, --read-only export read-only @item -P, --partition=@var{num} -- 1.7.1
Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default
On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote: Developers will only be happy with seccomp if it's easy and rewarding to support/debug. Agreed. As a developer, how do you feel about the audit/syslog based approach I mentioned earlier? I used the commands you posted (I think that's what you mean). They produce useful output. The problem is that without an error message on stderr or from the shell, no one will think QEMU process dead and hung == check seccomp immediately. It's frustrating to deal with a silent failure. Stefan
Re: [Qemu-devel] [RFC PATCH 03/21] target-arm: adjust TTBCR for TrustZone feature
On 12/03/2013 04:15 PM, Peter Crosthwaite wrote: On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote: TTBCR has additional fields PD0 and PD1 when using Short-descriptor translation table format on a CPU with TrustZone feature support. Signed-off-by: Sergey Fedorov s.fedo...@samsung.com --- target-arm/helper.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index a247ca0..6642e53 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1159,8 +1159,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, { int maskshift = extract32(value, 0, 3); -if (arm_feature(env, ARM_FEATURE_LPAE)) { +if (arm_feature(env, ARM_FEATURE_LPAE) (value (1 31))) { This appears to be changing more than just trustzone dependent behavior. That is, if we take just this hunk and ignore the one below you see a change in the non-tz behaviour. Is the hunk legitimate irrespective of trustzone support? Yes, current implementation is not accurate according to ARMv7-AR reference manual. See B4.1.153 TTBCR, Translation Table Base Control Register, VMSA | TTBCR format when using the Long-descriptor translation table format. When LPAE feature is supported, EAE, bit[31] selects translation descriptor format and, therefore, TTBCR format. value = ~((7 19) | (3 14) | (0xf 3)); +} else if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) { +value = 0x37; } else { value = 7; } There are a few magic numbers in the patch probably worth macrofiying. As I can see, magic numbers are widely used through all of this file to represent CP register fields and other things. Maybe the macrofying should be done separately from this patch series? Regards, Peter -- 1.7.9.5 Best regards, Sergey Fedorov
Re: [Qemu-devel] [RFC PATCH 04/21] target-arm: preserve RAO/WI bits of ARMv7 SCTLR
On 12/03/2013 04:17 PM, Peter Crosthwaite wrote: On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote: From: Svetlana Fedoseeva s.fedose...@samsung.com Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com Signed-off-by: Sergey Fedorov s.fedo...@samsung.com --- target-arm/helper.c |4 1 file changed, 4 insertions(+) diff --git a/target-arm/helper.c b/target-arm/helper.c index 6642e53..d7922ad 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1507,6 +1507,10 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +if (arm_feature(env, ARM_FEATURE_V7)) { +value = value | 0x00c50078; /* This bits are RAO/WI */ Magic number. these bits . Would be acceptable to substitute this magic number with bitshifted constants combined with bitwise or, e.g. as in vmsa_ttbcr_raw_write()? +} + env-cp15.c1_sys = value; /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ -- 1.7.9.5 -- Best regards, Sergey Fedorov, Junior Software Engineer, Samsung RD Institute Rus. E-mail: s.fedo...@samsung.com
Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode
On 12/03/2013 04:51 PM, Peter Maydell wrote: On 3 December 2013 12:20, Peter Crosthwaite peter.crosthwa...@xilinx.com wrote: On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote: From: Svetlana Fedoseeva s.fedose...@samsung.com Define CPU monitor mode. Adjust core registers banking. Adjust CPU VM state info. Provide CPU mode name for monitor mode. Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com Signed-off-by: Sergey Fedorov s.fedo...@samsung.com --- target-arm/cpu.h |7 --- target-arm/helper.c|3 +++ target-arm/machine.c | 12 ++-- target-arm/translate.c |2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 0b93e39..94d8bd1 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -124,9 +124,9 @@ typedef struct CPUARMState { uint32_t spsr; /* Banked registers. */ -uint32_t banked_spsr[6]; -uint32_t banked_r13[6]; -uint32_t banked_r14[6]; +uint32_t banked_spsr[7]; +uint32_t banked_r13[7]; +uint32_t banked_r14[7]; Are there any more modes yet to be implemented? It might save on future VMSD version bumps if we just pad this out to its ultimate value now. The remaining mode defined for AArch32 which we don't implement yet is Hyp mode, which has a banked R13 and SPSR, but not a banked LR. -- PMM So should a number of banked core registers be increased more? Personally, I'd like to keep this patch only TZ-related. Best regards, Sergey Fedorov
[Qemu-devel] [PATCH v2] qcow2: Zero-initialise first cluster for new images
Strictly speaking, this is only required for has_zero_init() == false, but it's easy enough to just do a cluster-aligned write that is padded with zeros after the header. This fixes that after 'qemu-img create' header extensions are attempted to be parsed that are really just random leftover data. Cc: qemu-sta...@nongnu.org Signed-off-by: Kevin Wolf kw...@redhat.com Reviewed-by: Fam Zheng f...@redhat.com Reviewed-by: Paolo Bonzini pbonz...@redhat.com --- block/qcow2.c | 36 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 6e5d98d..a205f31 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1471,7 +1471,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * size for any qcow2 image. */ BlockDriverState* bs; -QCowHeader header; +QCowHeader *header; uint8_t* refcount_table; Error *local_err = NULL; int ret; @@ -1489,30 +1489,34 @@ static int qcow2_create2(const char *filename, int64_t total_size, } /* Write the header */ -memset(header, 0, sizeof(header)); -header.magic = cpu_to_be32(QCOW_MAGIC); -header.version = cpu_to_be32(version); -header.cluster_bits = cpu_to_be32(cluster_bits); -header.size = cpu_to_be64(0); -header.l1_table_offset = cpu_to_be64(0); -header.l1_size = cpu_to_be32(0); -header.refcount_table_offset = cpu_to_be64(cluster_size); -header.refcount_table_clusters = cpu_to_be32(1); -header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT); -header.header_length = cpu_to_be32(sizeof(header)); +QEMU_BUILD_BUG_ON((1 MIN_CLUSTER_BITS) sizeof(*header)); +header = g_malloc0(cluster_size); +*header = (QCowHeader) { +.magic = cpu_to_be32(QCOW_MAGIC), +.version= cpu_to_be32(version), +.cluster_bits = cpu_to_be32(cluster_bits), +.size = cpu_to_be64(0), +.l1_table_offset= cpu_to_be64(0), +.l1_size= cpu_to_be32(0), +.refcount_table_offset = cpu_to_be64(cluster_size), +.refcount_table_clusters= cpu_to_be32(1), +.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT), +.header_length = cpu_to_be32(sizeof(*header)), +}; if (flags BLOCK_FLAG_ENCRYPT) { -header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); +header-crypt_method = cpu_to_be32(QCOW_CRYPT_AES); } else { -header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); +header-crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); } if (flags BLOCK_FLAG_LAZY_REFCOUNTS) { -header.compatible_features |= +header-compatible_features |= cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); } -ret = bdrv_pwrite(bs, 0, header, sizeof(header)); +ret = bdrv_pwrite(bs, 0, header, cluster_size); +g_free(header); if (ret 0) { error_setg_errno(errp, -ret, Could not write qcow2 header); goto out; -- 1.8.1.4
Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support
On 12/03/2013 12:48 PM, Sergey Fedorov wrote: This patch set implements a basic support of CPU core TrustZone feature. The following major functionalities are implemented: * CPU monitor mode * Separate code translation for each secure state * CPACR NSACR co-processor access control * Separate TLB for each secure state * Co-processor register banking * SMC instruction * FIQ/IRQ routing to monitor mode There is no support for banked co-processor register migration, save/load its VM state yet. That is an open question how to implement this functionality. Any suggestions is greatly appreciated. This patch set is a request for comments for the proof of concept. Sergey Fedorov (18): target-arm: move SCR VBAR into TrustZone register list target-arm: adjust TTBCR for TrustZone feature target-arm: add arm_is_secure() helper target-arm: reject switching to monitor mode from non-secure state target-arm: adjust arm_current_pl() for TrustZone target-arm: adjust SCR CP15 register access rights target-arm: add non-secure Translation Block flag target-arm: implement CPACR register logic target-arm: add NSACR support target-arm: add SDER definition target-arm: split TLB for secure state target-arm: add banked coprocessor register type target-arm: convert appropriate coprocessor registers to banked type target-arm: use c13_context field for CONTEXTIDR target-arm: switch banked CP registers target-arm: add MVBAR support target-arm: implement SMC instruction target-arm: implement IRQ/FIQ routing to Monitor mode Svetlana Fedoseeva (3): target-arm: add TrustZone CPU feature target-arm: preserve RAO/WI bits of ARMv7 SCTLR target-arm: add CPU Monitor mode target-arm/cpu.c |6 +- target-arm/cpu.h | 126 target-arm/helper.c| 308 +- target-arm/machine.c | 12 +- target-arm/translate.c | 388 ++-- target-arm/translate.h |2 + 6 files changed, 585 insertions(+), 257 deletions(-) We'd like this patch series finally to be merged into mainstream. What should be done to achieve this goal? Best regards, Sergey Fedorov
Re: [Qemu-devel] [PATCH v3] block: Close backing file early in bdrv_img_create
On Tue, Dec 03, 2013 at 02:57:52PM +0100, Max Reitz wrote: Leaving the backing file open although it is not needed anymore can cause problems if it is opened through a block driver which allows exclusive access only and if the create function of the block driver used for the top image (the one being created) tries to close and reopen the image file (which will include opening the backing file a second time). In particular, this will happen with a backing file opened through qemu-nbd and using qcow2 as the top image file format (which reopens the image to flush it to disk). In addition, the BlockDriverState in bdrv_img_create() is used for the backing file only; it should therefore be made local to the respective block. Signed-off-by: Max Reitz mre...@redhat.com --- v3: - Reverted the revert in v2 and limited the BlockDriverState once again to the scope its used in (addressing Xia's comment); added a missing bdrv_unref() in contrast to v1 (addressing Kevin's comment to v1) v2: - Minimizing the changes prevents introducing a leak of the BlockDriverState in case of an error in bdrv_open() (thanks, Kevin). --- block.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
Re: [Qemu-devel] [Bug 1257334] [NEW] diffuse handling of image creation from another path
On Tue, Dec 03, 2013 at 03:22:28PM -, Florian Miksch wrote: Hi, This does not look like a bug. Please see the explanation below... Inlining Florian's reproducer script so we can discuss it via email: #!/bin/bash # This script does create a backing image and overlay to # reproduce the reported bug. #... mkdir a # Create an image from another path e.g. in the directory 'a' qemu-img create -f qcow2 a/blob.img 10G # Create an overlay image from another path in the same directory qemu-img create -f qcow2 -b a/blob.img a/ovl.img # Get Info in the new directory cd a qemu-img info ovl.img # Output: # image: ovl.img # file format: qcow2 # virtual size: 10G (10737418240 bytes) # disk size: 196K # cluster_size: 65536 # backing file: a/blob.img # Get the Info from another directory cd .. qemu-img info a/ovl.img # Output: # image: a/ovl.img # file format: qcow2 # virtual size: 10G (10737418240 bytes) # disk size: 196K # cluster_size: 65536 # backing file: a/blob.img (actual path: a/a/blob.img) # Bug: # Compare 'image' # Compare 'backing file' # Look at 'actual path' The behavior you are showing here is explained as follows: Backing file paths can be relative or absolute and are stored inside the image file. In this case you are providing a relative backing file path. Relative backing file paths are interpreted against the image filename. In other words: join_path(dirname('a/ovl.img'), 'a/blob.img') - 'a/a/blob.img' # 'qemu-img info' takes the recommended path as name of the image # and the actual path is then: a/a/blob.img qemu-img commit a/ovl.img # Now commit fails # -- ERROR No such file or directory The problem was: qemu-img create -f qcow2 -b a/blob.img a/ovl.img If you want to use a relative backing file path, remember that it is relative to the image file: cd a qemu-img create -f qcow2 -b blob.img ovl.img Then 'qemu-img commit a/ovl.img' will work as expected. Stefan
Re: [Qemu-devel] [PATCH v2] qcow2: Zero-initialise first cluster for new images
On Wed, Dec 04, 2013 at 11:06:36AM +0100, Kevin Wolf wrote: Strictly speaking, this is only required for has_zero_init() == false, but it's easy enough to just do a cluster-aligned write that is padded with zeros after the header. This fixes that after 'qemu-img create' header extensions are attempted to be parsed that are really just random leftover data. Cc: qemu-sta...@nongnu.org Signed-off-by: Kevin Wolf kw...@redhat.com Reviewed-by: Fam Zheng f...@redhat.com Reviewed-by: Paolo Bonzini pbonz...@redhat.com --- block/qcow2.c | 36 1 file changed, 20 insertions(+), 16 deletions(-) Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
Re: [Qemu-devel] [RFC PATCH 03/21] target-arm: adjust TTBCR for TrustZone feature
On Wed, Dec 4, 2013 at 7:50 PM, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/03/2013 04:15 PM, Peter Crosthwaite wrote: On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote: TTBCR has additional fields PD0 and PD1 when using Short-descriptor translation table format on a CPU with TrustZone feature support. Signed-off-by: Sergey Fedorov s.fedo...@samsung.com --- target-arm/helper.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index a247ca0..6642e53 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1159,8 +1159,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, { int maskshift = extract32(value, 0, 3); -if (arm_feature(env, ARM_FEATURE_LPAE)) { +if (arm_feature(env, ARM_FEATURE_LPAE) (value (1 31))) { This appears to be changing more than just trustzone dependent behavior. That is, if we take just this hunk and ignore the one below you see a change in the non-tz behaviour. Is the hunk legitimate irrespective of trustzone support? Yes, current implementation is not accurate according to ARMv7-AR reference manual. See B4.1.153 TTBCR, Translation Table Base Control Register, VMSA | TTBCR format when using the Long-descriptor translation table format. When LPAE feature is supported, EAE, bit[31] selects translation descriptor format and, therefore, TTBCR format. Ok, You should probably split it off as a separate patch. And you could send probably send it immediately. What you just wrote is a nice commit message. value = ~((7 19) | (3 14) | (0xf 3)); +} else if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) { +value = 0x37; } else { value = 7; } There are a few magic numbers in the patch probably worth macrofiying. As I can see, magic numbers are widely used through all of this file to represent CP register fields and other things. Maybe the macrofying should be done separately from this patch series? So target-arm is riddled with legacy style. Converting all of target-arm to self-doccing macros is a big job, but if we keep expanding without doing it that job only gets bigger. I'll defer to PMM on what we want to policy to be going forward. - any thoughts? Regards, Peter Regards, Peter -- 1.7.9.5 Best regards, Sergey Fedorov
Re: [Qemu-devel] [PATCH 00/23] qemu state loading issues
On Tue, Dec 03, 2013 at 06:24:24PM +, Peter Maydell wrote: On 3 December 2013 16:28, Michael S. Tsirkin m...@redhat.com wrote: For example: https://bugzilla.redhat.com/show_bug.cgi?id=588133#c8 https://bugzilla.redhat.com/show_bug.cgi?id=588133#c9 I get not authorized errors on both of those. Oh, sorry :( You'll have to take my word on it that what happened there was that a bug reporter saved state and suggested that developer attempt to load it to reproduce the bug. This patchset is the result of that audit: it addresses this set of security issues by adding input validation and failing migration on invalid input. I notice that some but not all of these patches have CVE numbers in the commit messages -- what's the distinction that meant some of them get CVEs and some don't? thanks -- PMM The one that does not have a CVE is 23/23, this is a NULL pointer dereference on invalid input, which is not nice but probably not exploitable. -- MST
Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode
On Wed, Dec 4, 2013 at 8:01 PM, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/03/2013 04:51 PM, Peter Maydell wrote: On 3 December 2013 12:20, Peter Crosthwaite peter.crosthwa...@xilinx.com wrote: On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote: From: Svetlana Fedoseeva s.fedose...@samsung.com Define CPU monitor mode. Adjust core registers banking. Adjust CPU VM state info. Provide CPU mode name for monitor mode. Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com Signed-off-by: Sergey Fedorov s.fedo...@samsung.com --- target-arm/cpu.h |7 --- target-arm/helper.c|3 +++ target-arm/machine.c | 12 ++-- target-arm/translate.c |2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 0b93e39..94d8bd1 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -124,9 +124,9 @@ typedef struct CPUARMState { uint32_t spsr; /* Banked registers. */ -uint32_t banked_spsr[6]; -uint32_t banked_r13[6]; -uint32_t banked_r14[6]; +uint32_t banked_spsr[7]; +uint32_t banked_r13[7]; +uint32_t banked_r14[7]; Are there any more modes yet to be implemented? It might save on future VMSD version bumps if we just pad this out to its ultimate value now. The remaining mode defined for AArch32 which we don't implement yet is Hyp mode, which has a banked R13 and SPSR, but not a banked LR. -- PMM So should a number of banked core registers be increased more? Personally, I'd like to keep this patch only TZ-related. So what im proposing is just a slightly more general patch. Is it really any more complicated than just applying your change pattern for the hyp mode? Patch subject would be something like: target-arm: Add all remaining missing modes The motiviation is less VMSD version bumps in ARM CPU (a place where I expect assume such version bumps to be considerable annoyance). Regards, Peter Best regards, Sergey Fedorov
Re: [Qemu-devel] [PATCH 12/12] target-arm: A64: add support for compare and branch imm
On 4 December 2013 00:48, Richard Henderson r...@twiddle.net wrote: On 12/04/2013 01:32 PM, Peter Maydell wrote: You're right that we can just make this function return the TCGv temp rather than making the caller pass one in. Are you suggesting the 64-bit case should return cpu_X[reg] rather than a copy of it, though? I think it would be pretty hard to reason about if you had to remember that sometimes you got a trashable copy and sometimes the real register. I think that the normal case is to write to the output in one step, and thus having an input overlap an output isn't your problem but tcg's. I would think the case of multi-step output to be fairly rare, and easily solvable by always allocating an output temporary. Does a copy to temp actually cost us anything, though? I was under the impression the TCG optimizer would basically throw it away again. I suppose you could just treat the semantics of it as assume you can't trash this TCGv in all cases and rely on the auto-free. Am I off-base on the multi-output thing? Modulo flags setting, of course, but I think that special case ought to be relatively solvable. (I'm still a bit on the fence about that pool of auto-freeing temporaries. Manual temp management is certainly a fertile source of decoder bugs, but in the longer term we might want to push the functionality down into the common code rather than having an ad-hoc thing in one decoder.) See also Sparc and S390. ;-) Ah, I hadn't noticed we were already doing this in other frontends. -- PMM
Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support
Hi Sergey, On Wed, Dec 4, 2013 at 8:08 PM, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/03/2013 12:48 PM, Sergey Fedorov wrote: This patch set implements a basic support of CPU core TrustZone feature. The following major functionalities are implemented: * CPU monitor mode * Separate code translation for each secure state * CPACR NSACR co-processor access control * Separate TLB for each secure state * Co-processor register banking * SMC instruction * FIQ/IRQ routing to monitor mode There is no support for banked co-processor register migration, save/load its VM state yet. That is an open question how to implement this functionality. Any suggestions is greatly appreciated. This patch set is a request for comments for the proof of concept. Sergey Fedorov (18): target-arm: move SCR VBAR into TrustZone register list target-arm: adjust TTBCR for TrustZone feature target-arm: add arm_is_secure() helper target-arm: reject switching to monitor mode from non-secure state target-arm: adjust arm_current_pl() for TrustZone target-arm: adjust SCR CP15 register access rights target-arm: add non-secure Translation Block flag target-arm: implement CPACR register logic target-arm: add NSACR support target-arm: add SDER definition target-arm: split TLB for secure state target-arm: add banked coprocessor register type target-arm: convert appropriate coprocessor registers to banked type target-arm: use c13_context field for CONTEXTIDR target-arm: switch banked CP registers target-arm: add MVBAR support target-arm: implement SMC instruction target-arm: implement IRQ/FIQ routing to Monitor mode Svetlana Fedoseeva (3): target-arm: add TrustZone CPU feature target-arm: preserve RAO/WI bits of ARMv7 SCTLR target-arm: add CPU Monitor mode target-arm/cpu.c |6 +- target-arm/cpu.h | 126 target-arm/helper.c| 308 +- target-arm/machine.c | 12 +- target-arm/translate.c | 388 ++-- target-arm/translate.h |2 + 6 files changed, 585 insertions(+), 257 deletions(-) We'd like this patch series finally to be merged into mainstream. What should be done to achieve this goal? Send patch series' just like this and either action or challenge reviewer comments (as you have already done). Next time, you can drop the RFC, that means you intend the series for review+merge. If you get no reply after a week ping the series. I'm about half way through the series on my first pass. And I guess PMM has had a look as he has commented on one my comments. A 21 patch series with +500/-250 is long, and queue maintainers are generally hesitant to take partial series (as its often contrary to author intention). Things generally go faster with smaller series. You have some low-complexity cleanup type patches that dont add new features than you could potentially send as a smaller initial series to get upstream quickly, then take more time on the harder stuff as follow up series with reduced patch count. Regards, Peter Best regards, Sergey Fedorov
Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support
On 4 December 2013 10:08, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/03/2013 12:48 PM, Sergey Fedorov wrote: This patch set implements a basic support of CPU core TrustZone feature. We'd like this patch series finally to be merged into mainstream. What should be done to achieve this goal? I'd like to see TZ support in mainline too. The high level answer is that it needs to get code reviewed, and you need to fix issues that are raised in code review. Unfortunately my review queue is currently pretty full (it has Allwinner board support, DIGIC board support, a bunch of Cadence fixes, ARMv8 32 bit new instructions and the A64 64 bit instruction support in it, all of which are fairly big patchsets), so it may take me a little while to get to this patchset. It is on my todo list though, so it won't get forgotten :-) thanks -- PMM
Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode
On 4 December 2013 10:58, Peter Crosthwaite peter.crosthwa...@xilinx.com wrote: So what im proposing is just a slightly more general patch. Is it really any more complicated than just applying your change pattern for the hyp mode? I think it would be, because of the wrinkle that hyp mode doesn't have a banked LR, so the existing assume we can just translate the mode into a single index good for both LR and SP logic would break. The minimal change if we wanted to keep VMSD bumps to a minimum would be to increase the size of the banked_spsr[] and banked_r13[] arrays to allow for Hyp mode but do nothing else (except add a comment about it I guess). The motiviation is less VMSD version bumps in ARM CPU (a place where I expect assume such version bumps to be considerable annoyance). Well, they're only a problem at the point where we start trying to support cross-version migration; we're not at that point yet... thanks -- PMM
Re: [Qemu-devel] [PATCH v2 2/2] target-i386: Intel MPX
Almost there. Migration (vmstate) is still missing. Like this: == From faead85c0dbe62da896e0ed9e165d98e10216968 Mon Sep 17 00:00:00 2001 From: Liu Jinsong jinsong@intel.com Date: Wed, 4 Dec 2013 16:56:49 +0800 Subject: [PATCH 2/2] target-i386: Intel MPX Add some MPX related definiation, and hardcode sizes and offsets of xsave features 3 and 4. It also add corresponding part to kvm_get/put_xsave, and vmstate. Signed-off-by: Liu Jinsong jinsong@intel.com --- target-i386/cpu.c |4 target-i386/cpu.h | 22 +++--- target-i386/kvm.c | 10 ++ target-i386/machine.c | 32 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 544b57f..52ca029 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -336,6 +336,10 @@ typedef struct ExtSaveArea { static const ExtSaveArea ext_save_areas[] = { [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX, .offset = 0x240, .size = 0x100 }, +[3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, +.offset = 0x3c0, .size = 0x40 }, +[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, +.offset = 0x400, .size = 0x10 }, }; const char *get_register_name_32(unsigned int reg) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index ea373e8..5c1dd17 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -380,9 +380,12 @@ #define MSR_VM_HSAVE_PA 0xc0010117 -#define XSTATE_FP 1 -#define XSTATE_SSE 2 -#define XSTATE_YMM 4 +#define XSTATE_FP (1ULL 0) +#define XSTATE_SSE (1ULL 1) +#define XSTATE_YMM (1ULL 2) +#define XSTATE_BNDREGS (1ULL 3) +#define XSTATE_BNDCSR (1ULL 4) + /* CPUID feature words */ typedef enum FeatureWord { @@ -545,6 +548,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_ERMS (1 9) #define CPUID_7_0_EBX_INVPCID (1 10) #define CPUID_7_0_EBX_RTM (1 11) +#define CPUID_7_0_EBX_MPX (1 14) #define CPUID_7_0_EBX_RDSEED (1 18) #define CPUID_7_0_EBX_ADX (1 19) #define CPUID_7_0_EBX_SMAP (1 20) @@ -695,6 +699,16 @@ typedef union { uint64_t q; } MMXReg; +typedef struct BNDReg { +uint64_t lb; +uint64_t ub; +} BNDReg; + +typedef struct BNDCSReg { +uint64_t cfg; +uint64_t sts; +} BNDCSReg; + #ifdef HOST_WORDS_BIGENDIAN #define XMM_B(n) _b[15 - (n)] #define XMM_W(n) _w[7 - (n)] @@ -912,6 +926,8 @@ typedef struct CPUX86State { uint64_t xstate_bv; XMMReg ymmh_regs[CPU_NB_REGS]; +BNDReg bnd_regs[4]; +BNDCSReg bndcs_regs; uint64_t xcr0; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 749aa09..347d3d3 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -980,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_XMM_SPACE 40 #define XSAVE_XSTATE_BV 128 #define XSAVE_YMMH_SPACE 144 +#define XSAVE_BNDREGS 240 +#define XSAVE_BNDCSR 256 static int kvm_put_xsave(X86CPU *cpu) { @@ -1012,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu) *(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv; memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs, sizeof env-ymmh_regs); +memcpy(xsave-region[XSAVE_BNDREGS], env-bnd_regs, +sizeof env-bnd_regs); +memcpy(xsave-region[XSAVE_BNDCSR], env-bndcs_regs, +sizeof(env-bndcs_regs)); r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1294,6 +1300,10 @@ static int kvm_get_xsave(X86CPU *cpu) env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV]; memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE], sizeof env-ymmh_regs); +memcpy(env-bnd_regs, xsave-region[XSAVE_BNDREGS], +sizeof env-bnd_regs); +memcpy(env-bndcs_regs, xsave-region[XSAVE_BNDCSR], +sizeof(env-bndcs_regs)); return 0; } diff --git a/target-i386/machine.c b/target-i386/machine.c index e568da2..ca8be7d 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -63,6 +63,36 @@ static const VMStateDescription vmstate_ymmh_reg = { #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) +static const VMStateDescription vmstate_bnd_regs = { +.name = bnd_regs, +.version_id = 1, +.minimum_version_id = 1, +.minimum_version_id_old = 1, +.fields = (VMStateField []) { +VMSTATE_UINT64(lb, BNDReg), +VMSTATE_UINT64(ub, BNDReg), +VMSTATE_END_OF_LIST() +} +}; + +#define VMSTATE_BNDREG_VARS(_field, _state, _n, _v) \ +VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_bnd_regs, BNDReg) + +static const
Re: [Qemu-devel] [PATCH v8 4/5] hw/arm: add allwinner a10 SoC support
On Wed, Dec 4, 2013 at 6:09 PM, liguang lig.f...@cn.fujitsu.com wrote: Signed-off-by: liguang lig.f...@cn.fujitsu.com --- hw/arm/Makefile.objs |2 +- hw/arm/allwinner-a10.c | 77 include/hw/arm/allwinner-a10.h | 36 ++ 3 files changed, 114 insertions(+), 1 deletions(-) create mode 100644 hw/arm/allwinner-a10.c create mode 100644 include/hw/arm/allwinner-a10.h diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs index 3671b42..b9e5983 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o -obj-y += omap1.o omap2.o strongarm.o +obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c new file mode 100644 index 000..c4699b7 --- /dev/null +++ b/hw/arm/allwinner-a10.c @@ -0,0 +1,77 @@ +#include hw/sysbus.h +#include hw/devices.h +#include hw/arm/allwinner-a10.h + + +static void aw_a10_init(Object *obj) +{ +AwA10State *s = AW_A10(obj); +DeviceState *dev; + +object_initialize(s-cpu, sizeof(s-cpu), cortex-a8- TYPE_ARM_CPU); +object_property_add_child(obj, cpu, OBJECT(s-cpu), NULL); + +object_initialize(s-intc, sizeof(s-timer), TYPE_AW_A10_PIC); +dev = DEVICE(s-intc); You could just use DEVICE(foo) inline here and below and drop the dev variable to reduce verbosity. +qdev_set_parent_bus(dev, sysbus_get_default()); + +object_initialize(s-timer, sizeof(s-timer), TYPE_AW_A10_PIT); +dev = DEVICE(s-timer); +qdev_set_parent_bus(dev, sysbus_get_default()); +} + +static void aw_a10_realize(DeviceState *dev, Error **errp) +{ +AwA10State *s = AW_A10(dev); +SysBusDevice *sysbusdev; +uint8_t i; +Error *err = NULL; + +object_property_set_bool(OBJECT(s-cpu), true, realized, err); +s-cpu_irq[0] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_IRQ); +s-cpu_irq[1] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_FIQ); + +object_property_set_bool(OBJECT(s-intc), true, realized, err); You neither assert nor propagate the error. So the repeated usage below will cause a delayed (and somewhat obscure) assertion below if both happen to error for some reason. +sysbusdev = SYS_BUS_DEVICE(s-intc); +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIC_REG_BASE); +sysbus_connect_irq(sysbusdev, 0, s-cpu_irq[0]); +sysbus_connect_irq(sysbusdev, 1, s-cpu_irq[1]); +for (i = 0; i AW_A10_PIC_INT_NR; i++) { +s-irq[i] = qdev_get_gpio_in(DEVICE(s-intc), i); +} + +object_property_set_bool(OBJECT(s-timer), true, realized, err); +sysbusdev = SYS_BUS_DEVICE(s-timer); +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIT_REG_BASE); +sysbus_connect_irq(sysbusdev, 0, s-irq[22]); +sysbus_connect_irq(sysbusdev, 1, s-irq[23]); +sysbus_connect_irq(sysbusdev, 2, s-irq[24]); +sysbus_connect_irq(sysbusdev, 3, s-irq[25]); +sysbus_connect_irq(sysbusdev, 4, s-irq[67]); +sysbus_connect_irq(sysbusdev, 5, s-irq[68]); + +serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, s-irq[1], + 115200, serial_hds[0], DEVICE_NATIVE_ENDIAN); +} + +static void aw_a10_class_init(ObjectClass *oc, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(oc); + +dc-realize = aw_a10_realize; +} + +static const TypeInfo aw_a10_type_info = { +.name = TYPE_AW_A10, +.parent = TYPE_DEVICE, +.instance_size = sizeof(AwA10State), +.instance_init = aw_a10_init, +.class_init = aw_a10_class_init, +}; + +static void aw_a10_register_types(void) +{ +type_register_static(aw_a10_type_info); +} + +type_init(aw_a10_register_types) diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h new file mode 100644 index 000..a3e7b77 --- /dev/null +++ b/include/hw/arm/allwinner-a10.h @@ -0,0 +1,36 @@ +#ifndef ALLWINNER_H_ + +#include qemu-common.h +#include qemu/error-report.h +#include hw/char/serial.h +#include hw/arm/arm.h +#include hw/timer/allwinner-a10-pit.h +#include hw/intc/allwinner-a10-pic.h + +#include sysemu/sysemu.h +#include exec/address-spaces.h + + +#define AW_A10_PIC_REG_BASE 0x01c20400 +#define AW_A10_PIT_REG_BASE 0x01c20c00 +#define AW_A10_UART0_REG_BASE 0x01c28000 + +#define AW_A10_SDRAM_BASE 0x4000 + +#define TYPE_AW_A10 allwiner-a10 allwinner +#define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10) + +typedef struct AwA10State { +/* private */ +DeviceState parent_obj; +/* public */ + +ARMCPU cpu; +qemu_irq irq[AW_A10_PIC_INT_NR]; +qemu_irq cpu_irq[2]; I dont see the need to keep these as device state. They appear to be just local variables to realize(). +
[Qemu-devel] [PATCH v8 1/6] target-arm: Move call to disas_vfp_insn out of disas_coproc_insn.
Floating point is an extension to the instruction set rather than a coprocessor, so call it directly from the ARM and Thumb decode functions. Signed-off-by: Will Newton will.new...@linaro.org Reviewed-by: Peter Maydell peter.mayd...@linaro.org --- target-arm/translate.c | 32 +++- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index 5f003e7..f63e89d 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2636,6 +2636,14 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) rn != ARM_VFP_MVFR1 rn != ARM_VFP_MVFR0) return 1; } + +if (extract32(insn, 28, 4) == 0xf) { +/* Encodings with T=1 (Thumb) or unconditional (ARM): + * only used in v8 and above. + */ +return 1; +} + dp = ((insn 0xf00) == 0xb00); switch ((insn 24) 0xf) { case 0xe: @@ -6296,9 +6304,6 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn) return disas_dsp_insn(env, s, insn); } return 1; -case 10: -case 11: - return disas_vfp_insn (env, s, insn); default: break; } @@ -6753,6 +6758,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) goto illegal_op; return; } +if ((insn 0x0f000e10) == 0x0e000a00) { +/* VFP. */ +if (disas_vfp_insn(env, s, insn)) { +goto illegal_op; +} +return; +} if (((insn 0x0f30f000) == 0x0510f000) || ((insn 0x0f30f010) == 0x0710f000)) { if ((insn (1 22)) == 0) { @@ -8033,9 +8045,15 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) case 0xc: case 0xd: case 0xe: -/* Coprocessor. */ -if (disas_coproc_insn(env, s, insn)) +if (((insn 8) 0xe) == 10) { +/* VFP. */ +if (disas_vfp_insn(env, s, insn)) { +goto illegal_op; +} +} else if (disas_coproc_insn(env, s, insn)) { +/* Coprocessor. */ goto illegal_op; +} break; case 0xf: /* swi */ @@ -8765,6 +8783,10 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw insn = (insn 0xe2ff) | ((insn (1 28)) 4) | (1 28); if (disas_neon_data_insn(env, s, insn)) goto illegal_op; +} else if (((insn 8) 0xe) == 10) { +if (disas_vfp_insn(env, s, insn)) { +goto illegal_op; +} } else { if (insn (1 28)) goto illegal_op; -- 1.8.1.4
[Qemu-devel] [PATCH v8 2/6] target-arm: Implement ARMv8 VSEL instruction.
This adds support for the VSEL floating point selection instruction which was added in ARMv8. Signed-off-by: Will Newton will.new...@linaro.org Reviewed-by: Peter Maydell peter.mayd...@linaro.org --- target-arm/translate.c | 135 - 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index f63e89d..0a22ad8 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2614,6 +2614,139 @@ static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) return tmp; } +static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, + uint32_t dp) +{ +uint32_t cc = extract32(insn, 20, 2); + +if (dp) { +TCGv_i64 frn, frm, dest; +TCGv_i64 tmp, zero, zf, nf, vf; + +zero = tcg_const_i64(0); + +frn = tcg_temp_new_i64(); +frm = tcg_temp_new_i64(); +dest = tcg_temp_new_i64(); + +zf = tcg_temp_new_i64(); +nf = tcg_temp_new_i64(); +vf = tcg_temp_new_i64(); + +tcg_gen_extu_i32_i64(zf, cpu_ZF); +tcg_gen_ext_i32_i64(nf, cpu_NF); +tcg_gen_ext_i32_i64(vf, cpu_VF); + +tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn)); +tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm)); +switch (cc) { +case 0: /* eq: Z */ +tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, +frn, frm); +break; +case 1: /* vs: V */ +tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, +frn, frm); +break; +case 2: /* ge: N == V - N ^ V == 0 */ +tmp = tcg_temp_new_i64(); +tcg_gen_xor_i64(tmp, vf, nf); +tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, +frn, frm); +tcg_temp_free_i64(tmp); +break; +case 3: /* gt: !Z N == V */ +tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, +frn, frm); +tmp = tcg_temp_new_i64(); +tcg_gen_xor_i64(tmp, vf, nf); +tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, +dest, frm); +tcg_temp_free_i64(tmp); +break; +} +tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd)); +tcg_temp_free_i64(frn); +tcg_temp_free_i64(frm); +tcg_temp_free_i64(dest); + +tcg_temp_free_i64(zf); +tcg_temp_free_i64(nf); +tcg_temp_free_i64(vf); + +tcg_temp_free_i64(zero); +} else { +TCGv_i32 frn, frm, dest; +TCGv_i32 tmp, zero; + +zero = tcg_const_i32(0); + +frn = tcg_temp_new_i32(); +frm = tcg_temp_new_i32(); +dest = tcg_temp_new_i32(); +tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn)); +tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm)); +switch (cc) { +case 0: /* eq: Z */ +tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, +frn, frm); +break; +case 1: /* vs: V */ +tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, +frn, frm); +break; +case 2: /* ge: N == V - N ^ V == 0 */ +tmp = tcg_temp_new_i32(); +tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); +tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, +frn, frm); +tcg_temp_free_i32(tmp); +break; +case 3: /* gt: !Z N == V */ +tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, +frn, frm); +tmp = tcg_temp_new_i32(); +tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); +tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, +dest, frm); +tcg_temp_free_i32(tmp); +break; +} +tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd)); +tcg_temp_free_i32(frn); +tcg_temp_free_i32(frm); +tcg_temp_free_i32(dest); + +tcg_temp_free_i32(zero); +} + +return 0; +} + +static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn) +{ +uint32_t rd, rn, rm, dp = extract32(insn, 8, 1); + +if (!arm_feature(env, ARM_FEATURE_V8)) { +return 1; +} + +if (dp) { +VFP_DREG_D(rd, insn); +VFP_DREG_N(rn, insn); +VFP_DREG_M(rm, insn); +} else { +rd = VFP_SREG_D(insn); +rn = VFP_SREG_N(insn); +rm = VFP_SREG_M(insn); +} + +if ((insn 0x0f800e50) == 0x0e000a00) { +return handle_vsel(insn, rd, rn, rm, dp); +} +return 1; +} + /* Disassemble a VFP instruction. Returns nonzero if an error occurred (ie. an undefined instruction).
[Qemu-devel] [PATCH v8 6/6] target-arm: Implement ARMv8 SIMD VMAXNM and VMINNM instructions.
This adds support for the ARMv8 Advanced SIMD VMAXNM and VMINNM instructions. Signed-off-by: Will Newton will.new...@linaro.org --- target-arm/translate.c | 31 ++- 1 file changed, 22 insertions(+), 9 deletions(-) Changes in v8: - Use VFP helper instead of adding a NEON specific one - Drop size check diff --git a/target-arm/translate.c b/target-arm/translate.c index 9a8069e..73ed266 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4553,7 +4553,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ -#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */ +#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ static const uint8_t neon_3r_sizes[] = { [NEON_3R_VHADD] = 0x7, @@ -4586,7 +4586,7 @@ static const uint8_t neon_3r_sizes[] = { [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ -[NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */ +[NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */ }; /* Symbolic constants for op fields for Neon 2-register miscellaneous. @@ -4847,8 +4847,9 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins return 1; } break; -case NEON_3R_VRECPS_VRSQRTS: -if (u) { +case NEON_3R_FLOAT_MISC: +/* VMAXNM/VMINNM in ARMv8 */ +if (u !arm_feature(env, ARM_FEATURE_V8)) { return 1; } break; @@ -5137,11 +5138,23 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins tcg_temp_free_ptr(fpstatus); break; } -case NEON_3R_VRECPS_VRSQRTS: -if (size == 0) -gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); -else -gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); +case NEON_3R_FLOAT_MISC: +if (u) { +/* VMAXNM/VMINNM */ +TCGv_ptr fpstatus = get_fpstatus_ptr(1); +if (size == 0) { +gen_helper_vfp_maxnms(tmp, tmp, tmp2, fpstatus); +} else { +gen_helper_vfp_minnms(tmp, tmp, tmp2, fpstatus); +} +tcg_temp_free_ptr(fpstatus); +} else { +if (size == 0) { +gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); +} else { +gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); + } +} break; case NEON_3R_VFM: { -- 1.8.1.4
[Qemu-devel] [PATCH v8 0/6] target-arm: Add support for VSEL and VMIN/MAXNM.
This series adds support for three new instructions added in ARMv8 - VSEL, VMINNM and VMAXNM. Will Newton (6): target-arm: Move call to disas_vfp_insn out of disas_coproc_insn. target-arm: Implement ARMv8 VSEL instruction. softfloat: Remove unused argument from MINMAX macro. softfloat: Add minNum() and maxNum() functions to softfloat. target-arm: Implement ARMv8 FP VMAXNM and VMINNM instructions. target-arm: Implement ARMv8 SIMD VMAXNM and VMINNM instructions. fpu/softfloat.c | 38 ++-- include/fpu/softfloat.h | 4 + target-arm/helper.c | 25 + target-arm/helper.h | 5 + target-arm/translate.c | 246 +--- 5 files changed, 298 insertions(+), 20 deletions(-) -- 1.8.1.4
[Qemu-devel] [PATCH v8 5/6] target-arm: Implement ARMv8 FP VMAXNM and VMINNM instructions.
This adds support for the ARMv8 floating point VMAXNM and VMINNM instructions. Signed-off-by: Will Newton will.new...@linaro.org Reviewed-by: Peter Maydell peter.mayd...@linaro.org --- target-arm/helper.c| 25 + target-arm/helper.h| 5 + target-arm/translate.c | 50 ++ 3 files changed, 80 insertions(+) diff --git a/target-arm/helper.c b/target-arm/helper.c index 3445813..7507240 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -4079,3 +4079,28 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp) float_status *fpst = fpstp; return float64_muladd(a, b, c, 0, fpst); } + +/* ARMv8 VMAXNM/VMINNM */ +float32 VFP_HELPER(maxnm, s)(float32 a, float32 b, void *fpstp) +{ +float_status *fpst = fpstp; +return float32_maxnum(a, b, fpst); +} + +float64 VFP_HELPER(maxnm, d)(float64 a, float64 b, void *fpstp) +{ +float_status *fpst = fpstp; +return float64_maxnum(a, b, fpst); +} + +float32 VFP_HELPER(minnm, s)(float32 a, float32 b, void *fpstp) +{ +float_status *fpst = fpstp; +return float32_minnum(a, b, fpst); +} + +float64 VFP_HELPER(minnm, d)(float64 a, float64 b, void *fpstp) +{ +float_status *fpst = fpstp; +return float64_minnum(a, b, fpst); +} diff --git a/target-arm/helper.h b/target-arm/helper.h index cac9564..d459a39 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -132,6 +132,11 @@ DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env) DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_maxnmd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxnms, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_minnmd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minnms, f32, f32, f32, ptr) + DEF_HELPER_3(recps_f32, f32, f32, f32, env) DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) DEF_HELPER_2(recpe_f32, f32, f32, env) diff --git a/target-arm/translate.c b/target-arm/translate.c index 0a22ad8..9a8069e 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2723,6 +2723,54 @@ static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, return 0; } +static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn, +uint32_t rm, uint32_t dp) +{ +uint32_t vmin = extract32(insn, 6, 1); +TCGv_ptr fpst = get_fpstatus_ptr(0); + +if (dp) { +TCGv_i64 frn, frm, dest; + +frn = tcg_temp_new_i64(); +frm = tcg_temp_new_i64(); +dest = tcg_temp_new_i64(); + +tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn)); +tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm)); +if (vmin) { +gen_helper_vfp_minnmd(dest, frn, frm, fpst); +} else { +gen_helper_vfp_maxnmd(dest, frn, frm, fpst); +} +tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd)); +tcg_temp_free_i64(frn); +tcg_temp_free_i64(frm); +tcg_temp_free_i64(dest); +} else { +TCGv_i32 frn, frm, dest; + +frn = tcg_temp_new_i32(); +frm = tcg_temp_new_i32(); +dest = tcg_temp_new_i32(); + +tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn)); +tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm)); +if (vmin) { +gen_helper_vfp_minnms(dest, frn, frm, fpst); +} else { +gen_helper_vfp_maxnms(dest, frn, frm, fpst); +} +tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd)); +tcg_temp_free_i32(frn); +tcg_temp_free_i32(frm); +tcg_temp_free_i32(dest); +} + +tcg_temp_free_ptr(fpst); +return 0; +} + static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn) { uint32_t rd, rn, rm, dp = extract32(insn, 8, 1); @@ -2743,6 +2791,8 @@ static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn) if ((insn 0x0f800e50) == 0x0e000a00) { return handle_vsel(insn, rd, rn, rm, dp); +} else if ((insn 0x0fb00e10) == 0x0e800a00) { +return handle_vminmaxnm(insn, rd, rn, rm, dp); } return 1; } -- 1.8.1.4
[Qemu-devel] [PATCH v8 3/6] softfloat: Remove unused argument from MINMAX macro.
The nan_exp argument is not used, so remove it. Signed-off-by: Will Newton will.new...@linaro.org Reviewed-by: Peter Maydell peter.mayd...@linaro.org --- fpu/softfloat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 7ba51b6..97bf627 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6706,7 +6706,7 @@ int float128_compare_quiet( float128 a, float128 b STATUS_PARAM ) * 'compare and pick one input' because that would mishandle * NaNs and +0 vs -0. */ -#define MINMAX(s, nan_exp) \ +#define MINMAX(s) \ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ int ismin STATUS_PARAM )\ { \ @@ -6747,8 +6747,8 @@ float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \ return float ## s ## _minmax(a, b, 0 STATUS_VAR); \ } -MINMAX(32, 0xff) -MINMAX(64, 0x7ff) +MINMAX(32) +MINMAX(64) /* Multiply A by 2 raised to the power N. */ -- 1.8.1.4
[Qemu-devel] [PATCH v8 4/6] softfloat: Add minNum() and maxNum() functions to softfloat.
Add floatnn_minnum() and floatnn_maxnum() functions which are equivalent to the minNum() and maxNum() functions from IEEE 754-2008. They are similar to min() and max() but differ in the handling of QNaN arguments. Signed-off-by: Will Newton will.new...@linaro.org --- fpu/softfloat.c | 32 +--- include/fpu/softfloat.h | 4 2 files changed, 33 insertions(+), 3 deletions(-) Changes in v8: - Use existing MINMAX macro to implement minnum and maxnum - Improve comment diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 97bf627..dbda61b 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -6705,10 +6705,17 @@ int float128_compare_quiet( float128 a, float128 b STATUS_PARAM ) /* min() and max() functions. These can't be implemented as * 'compare and pick one input' because that would mishandle * NaNs and +0 vs -0. + * + * minnum() and maxnum() functions. These are similar to the min() + * and max() functions but if one of the arguments is a QNaN and + * the other is numerical then the numerical argument is returned. + * minnum() and maxnum correspond to the IEEE 754-2008 minNum() + * and maxNum() operations. min() and max() are the typical min/max + * semantics provided by many CPUs which predate that specification. */ #define MINMAX(s) \ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ -int ismin STATUS_PARAM )\ +int ismin, int isieee STATUS_PARAM) \ { \ flag aSign, bSign; \ uint ## s ## _t av, bv; \ @@ -6716,6 +6723,15 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ b = float ## s ## _squash_input_denormal(b STATUS_VAR); \ if (float ## s ## _is_any_nan(a) || \ float ## s ## _is_any_nan(b)) { \ +if (isieee) { \ +if (float ## s ## _is_quiet_nan(a)\ +!float ## s ##_is_any_nan(b)) { \ +return b; \ +} else if (float ## s ## _is_quiet_nan(b) \ + !float ## s ## _is_any_nan(a)) { \ +return a; \ +} \ +} \ return propagateFloat ## s ## NaN(a, b STATUS_VAR); \ } \ aSign = extractFloat ## s ## Sign(a); \ @@ -6739,12 +6755,22 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ \ float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \ { \ -return float ## s ## _minmax(a, b, 1 STATUS_VAR); \ +return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR);\ } \ \ float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \ { \ -return float ## s ## _minmax(a, b, 0 STATUS_VAR); \ +return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR);\ +} \ +\ +float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \ +{ \ +return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR);\ +} \ +\ +float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \ +{ \ +return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR);\ } MINMAX(32) diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index f3927e2..2365274 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -302,6 +302,8 @@ int float32_compare( float32, float32 STATUS_PARAM ); int float32_compare_quiet( float32,
Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP
On 12/04/2013 08:33 PM, Markus Armbruster wrote: Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: Normally the user is expected to eject DVD if it is not locked by the guest. eject_device() makes few checks and calls bdrv_close() if DVD is not in use. However it is still possible to eject DVD even if it is in use. For that, QEMU sets eject requested flag, the guest reads it, issues ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case, bdrv_close() is not called anywhere so it remains inserted in QEMU's terms. This is expected behavior, and matches what IDE does. Markus, can you confirm? Confirmed. See commit 4be9762. Alexey, monitor commands eject does two things: it first opens the tray, and if that works, it removes the medium. If the tray is locked closed, it tells the device model that eject was requested. Works just like the physical eject button. With -f, it then rips out the medium. This is similar to opening the tray with a unbent paperclip. Let's ignore this case. The scsi-cd device model tells the guest about the eject request. A well-behaved guest will then command the device to unlock and open the tray. The guest uses the same commands on behalf of its applications, e.g. /usr/bin/eject. Your patch changes behavior of eject /dev/sr0 eject -t /dev/sr0: you no longer get the same medium back. You normally do with real hardware. The somewhat unfortunate consequence is that monitor command eject can only remove the medium when the tray is not locked. Oh. Wow. Nice :-/ Ok. So. It is expected that the real system will close the tray back if it was mounted, is not it? Right now, after eject info block is like this: cd1: virtimg/Fedora-19-ppc64-netinst.iso (raw) Removable device: locked, tray open And the mountpoint does not work in the guest. The state above even persists after umount in the guest. It only becomes correct again (tray==closed) when I mount DVD again. Is it all expected to work like this? Thanks. -- Alexey
Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL
On 12/04/2013 08:35 PM, Markus Armbruster wrote: Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: This prevents the guest from preventing DVD medium removal when there is no medium. Without this, if the user has ejected a DVD, it is possible to have a block device with an open tray, no media but locked. I need to check this against a real disk. Thanks! If I remember correctly, I did, and it works. You can still close the open tray in this state. I thought it is about NOT letting close the open tray. So the hardware can be programmed to ignore the hardware eject button when the tray is open? -- Alexey
Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode
On 12/04/2013 03:18 PM, Peter Maydell wrote: On 4 December 2013 10:58, Peter Crosthwaite peter.crosthwa...@xilinx.com wrote: So what im proposing is just a slightly more general patch. Is it really any more complicated than just applying your change pattern for the hyp mode? I think it would be, because of the wrinkle that hyp mode doesn't have a banked LR, so the existing assume we can just translate the mode into a single index good for both LR and SP logic would break. The minimal change if we wanted to keep VMSD bumps to a minimum would be to increase the size of the banked_spsr[] and banked_r13[] arrays to allow for Hyp mode but do nothing else (except add a comment about it I guess). If we want to bump VMSD just once for monitor + hypervisor mode then we need to add ELR_hyp register definition too. I think then it would be better to implement hypervisor mode and its special banking scheme, too. But I doubt it worth to combine these two things into one patch. The motiviation is less VMSD version bumps in ARM CPU (a place where I expect assume such version bumps to be considerable annoyance). Well, they're only a problem at the point where we start trying to support cross-version migration; we're not at that point yet... thanks -- PMM Best regards, Sergey Fedorov
Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode
On 4 December 2013 12:33, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/04/2013 03:18 PM, Peter Maydell wrote: On 4 December 2013 10:58, Peter Crosthwaite peter.crosthwa...@xilinx.com wrote: So what im proposing is just a slightly more general patch. Is it really any more complicated than just applying your change pattern for the hyp mode? I think it would be, because of the wrinkle that hyp mode doesn't have a banked LR, so the existing assume we can just translate the mode into a single index good for both LR and SP logic would break. The minimal change if we wanted to keep VMSD bumps to a minimum would be to increase the size of the banked_spsr[] and banked_r13[] arrays to allow for Hyp mode but do nothing else (except add a comment about it I guess). If we want to bump VMSD just once for monitor + hypervisor mode then we need to add ELR_hyp register definition too. I think then it would be better to implement hypervisor mode and its special banking scheme, too. But I doubt it worth to combine these two things into one patch. It's possible to add single new fields to the VMState without requiring a compatibility break, by marking the new field as only present in version X or greater; new elements on the end of arrays are a little fiddlier. But yes, I think we should just not worry about possible future Hyp mode now. Let's stick with your current patch. thanks -- PMM
[Qemu-devel] [PATCH] qemu-iotests: 074 fixups
A few things missing from the previous patch: * add 074 to the group file * label new output 074 instead of 048 * remove qemu-io prompts from output (qemu.git/master merge conflict) Signed-off-by: Stefan Hajnoczi stefa...@redhat.com --- Fam: can I squash these missing things into your patch? tests/qemu-iotests/074 | 0 tests/qemu-iotests/074.out | 10 +- tests/qemu-iotests/group | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) mode change 100644 = 100755 tests/qemu-iotests/074 diff --git a/tests/qemu-iotests/074 b/tests/qemu-iotests/074 old mode 100644 new mode 100755 diff --git a/tests/qemu-iotests/074.out b/tests/qemu-iotests/074.out index 3deb594..8fba5ae 100644 --- a/tests/qemu-iotests/074.out +++ b/tests/qemu-iotests/074.out @@ -1,17 +1,17 @@ -QA output created by 048 +QA output created by 074 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 === IO: pattern 102 -qemu-io wrote 512/512 bytes at offset 512 +wrote 512/512 bytes at offset 512 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-io qemu-img: Error while reading offset 0 of blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +qemu-img: Error while reading offset 0 of blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error qemu-img: Error while reading offset 0: Input/output error 4 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=0 === IO: pattern 102 -qemu-io wrote 512/512 bytes at offset 512 +wrote 512/512 bytes at offset 512 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-io qemu-img: Error while reading offset 0 of blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error +qemu-img: Error while reading offset 0 of blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error qemu-img: Error while reading offset 0 of blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error Warning: Image size mismatch! 4 diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 303e0f3..ed10720 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -77,3 +77,4 @@ 069 rw auto 070 rw auto 073 rw auto +074 rw auto -- 1.8.4.2
Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support
On 12/04/2013 03:13 PM, Peter Maydell wrote: On 4 December 2013 10:08, Fedorov Sergey s.fedo...@samsung.com wrote: On 12/03/2013 12:48 PM, Sergey Fedorov wrote: This patch set implements a basic support of CPU core TrustZone feature. We'd like this patch series finally to be merged into mainstream. What should be done to achieve this goal? I'd like to see TZ support in mainline too. That is the most important for me now :-) So I will try these patches to get shape suitable for mainline. Thanks! The high level answer is that it needs to get code reviewed, and you need to fix issues that are raised in code review. Unfortunately my review queue is currently pretty full (it has Allwinner board support, DIGIC board support, a bunch of Cadence fixes, ARMv8 32 bit new instructions and the A64 64 bit instruction support in it, all of which are fairly big patchsets), so it may take me a little while to get to this patchset. It is on my todo list though, so it won't get forgotten :-) thanks -- PMM Best regards, Sergey Fedorov
Re: [Qemu-devel] [Qemu-trivial] [PATCH] vl: remove (max_cpus 255) check from smp_parse
On Wed, Dec 04, 2013 at 04:50:59PM +1100, Alexey Kardashevskiy wrote: On 12/04/2013 01:47 AM, Eduardo Habkost wrote: On Tue, Dec 03, 2013 at 02:30:48PM +0100, Andreas Färber wrote: Am 03.12.2013 00:03, schrieb Alexey Kardashevskiy: On 12/03/2013 09:09 AM, Andreas Färber wrote: Am 02.12.2013 18:06, schrieb Michael Tokarev: 25.11.2013 07:39, Alexey Kardashevskiy wrote: Since modern POWER7/POWER8 chips can have more that 256 CPU threads (2000 actually), remove this check from smp_parse. The CPUs number is still checked against machine-max_cpus and this check should be enough not to break other archs. should be is not exactly the highest level of confidence for a trivial patch... :/ [...] Alexey, did you actually check that, e.g., x86 machines don't break with 256 or 257 CPUs now? PC_DEFAULT_MACHINE_OPTIONS sets it to 255. And I cannot find any machine which would not define max_cpus, have I missed any? If you've actually *checked* the other machines' code then fine with me, just say so in the commit message. :) I just grepped for max_cpus and checked every match. The largest values I found were: hw/ppc/spapr.c: 256 s390: 255 pc: 255 All the rest had values = 32. Machines with missing max_cpus value shouldn't be a problem, as max_cpus==0 is interpreted as 1 by the vl.c code. But we still need to add a check for max_cpus machine-max_cpus to vl.c, before we eliminate the smp_parse() check. Since smp_parse() checks if (max_cpus = smp_cpus), this should just work: diff --git a/vl.c b/vl.c index e6ed260..544165a 100644 --- a/vl.c +++ b/vl.c @@ -3882,9 +3882,9 @@ int main(int argc, char **argv, char **envp) smp_parse(qemu_opts_find(qemu_find_opts(smp-opts), NULL)); machine-max_cpus = machine-max_cpus ?: 1; /* Default to UP */ -if (smp_cpus machine-max_cpus) { +if (max_cpus machine-max_cpus) { fprintf(stderr, Number of SMP cpus requested (%d), exceeds max cpus -supported by machine `%s' (%d)\n, smp_cpus, machine-name, +supported by machine `%s' (%d)\n, max_cpus, machine-name, machine-max_cpus); exit(1); } There's also this, at main(): if (i == nb_numa_nodes) { for (i = 0; i max_cpus; i++) { set_bit(i, node_cpumask[i % nb_numa_nodes]); } } node_cpumask[] is initialized using bitmap_new(MAX_CPUMASK_BITS), and MAX_CPUMASK_BITS is 255. To fix this, we can initialize node_cpumask[] using max_cpus instead, if we initialize it after smp_parse(). Nope. At the moment when we parse -numa in vl.c, we may not know yet what machine is going to be used and machines can have different max_cpus. This will be changed by: Subject: [PATCH V17 04/11] NUMA: convert -numa option to use OptsVisitor Message-Id: 1386143939-19142-5-git-send-email-gaowanl...@cn.fujitsu.com http://article.gmane.org/gmane.comp.emulators.qemu/244826 For now, I would simply change MAX_CPUMASK_BITS to something crazy, like 16384 (2KB per numa node), I hope QEMU can survive such a memory waste :) Ok? I'm OK with that as long the code has proper checks in case max_cpus gets set to a crazily large value (larger than MAX_CPUMASK_BITS) in the far future, or if we prevent max_cpus from being larger than MAX_CPUMASK_BITS. -- Eduardo
Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL
Alexey Kardashevskiy a...@ozlabs.ru writes: On 12/04/2013 08:35 PM, Markus Armbruster wrote: Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: This prevents the guest from preventing DVD medium removal when there is no medium. Without this, if the user has ejected a DVD, it is possible to have a block device with an open tray, no media but locked. I need to check this against a real disk. Thanks! If I remember correctly, I did, and it works. You can still close the open tray in this state. I thought it is about NOT letting close the open tray. So the hardware can be programmed to ignore the hardware eject button when the tray is open? From memory: you can lock the medium whether the tray is open or not. The lock applies to the button operating the tray. It prevents medium eject. It never prevents closing an empty tray. I don't remember whether opening an empty tray is still permitted.
Re: [Qemu-devel] [PATCH] qemu-iotests: 074 fixups
On 2013年12月04日 20:45, Stefan Hajnoczi wrote: A few things missing from the previous patch: * add 074 to the group file * label new output 074 instead of 048 * remove qemu-io prompts from output (qemu.git/master merge conflict) Signed-off-by: Stefan Hajnoczi stefa...@redhat.com --- Fam: can I squash these missing things into your patch? Of course. Thanks. Fam
Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP
Alexey Kardashevskiy a...@ozlabs.ru writes: On 12/04/2013 08:33 PM, Markus Armbruster wrote: Paolo Bonzini pbonz...@redhat.com writes: Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto: Normally the user is expected to eject DVD if it is not locked by the guest. eject_device() makes few checks and calls bdrv_close() if DVD is not in use. However it is still possible to eject DVD even if it is in use. For that, QEMU sets eject requested flag, the guest reads it, issues ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case, bdrv_close() is not called anywhere so it remains inserted in QEMU's terms. This is expected behavior, and matches what IDE does. Markus, can you confirm? Confirmed. See commit 4be9762. Alexey, monitor commands eject does two things: it first opens the tray, and if that works, it removes the medium. If the tray is locked closed, it tells the device model that eject was requested. Works just like the physical eject button. With -f, it then rips out the medium. This is similar to opening the tray with a unbent paperclip. Let's ignore this case. The scsi-cd device model tells the guest about the eject request. A well-behaved guest will then command the device to unlock and open the tray. The guest uses the same commands on behalf of its applications, e.g. /usr/bin/eject. Your patch changes behavior of eject /dev/sr0 eject -t /dev/sr0: you no longer get the same medium back. You normally do with real hardware. The somewhat unfortunate consequence is that monitor command eject can only remove the medium when the tray is not locked. Oh. Wow. Nice :-/ Ok. So. It is expected that the real system will close the tray back if it was mounted, is not it? Right now, after eject info block is like this: cd1: virtimg/Fedora-19-ppc64-netinst.iso (raw) Removable device: locked, tray open And the mountpoint does not work in the guest. The state above even persists after umount in the guest. It only becomes correct again (tray==closed) when I mount DVD again. Is it all expected to work like this? Thanks. Can't reproduce, but can reproduce something similar. Freshly booted guest running RHEL-7 alpha, with the CD mounted: (qemu) info block cd cd: r7.iso (raw, read-only) Removable device: locked, tray closed Looks good. Try to eject: (qemu) eject cd Device 'cd' is locked Looks good. This should have signalled the guest user wants to eject. The guest should either ignore it, or unmount, unlock and eject. Apparently, it does that: (qemu) info block cd cd: r7.iso (raw, read-only) Removable device: locked, tray closed (qemu) eject cd Device 'cd' is locked (qemu) info block cd cd: r7.iso (raw, read-only) Removable device: locked, tray closed (qemu) info block cd cd: r7.iso (raw, read-only) Removable device: not locked, tray open Except it forgets to unmount! dmesg has VFS: busy inodes on changed media or resized disk sr0. Need somebody to find out how exactly this fails, and whether it's a guest bug or a QEMU bug.
Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default
The existing approach clearly doesn't support the full range of options that users specify on the command-line. Bugs. It will get fixed in time with more testing/debugging. Eduardo is working on improving the testing and RH's QA folks are working hard to shake out the bugs too. I just posted another bug fix patch to the whitelist a few days ago. Exactly, I'm working close with virt-test team to improve the testing and feedback for possible illegal syscalls on various scenarios. So I guess the options are: 1. Don't make it the default since it breaks stuff but use it for very specific scenarios (e.g. libvirt use cases that have been well tested). In my opinion, I think it was probably a bit premature to make enable it by default, but at some point in the future I think we do need to do this. I have to admit it was a little premature, yes. But I think once we have a stable set of tool in virt-test, we can turn it on by default in a near future. 2. Provide a kind of syscall set for various QEMU options and apply the union of them at launch. This still seems fragile but in theory it could work. This is what I was discussing above. I think this is likely the next big improvement. That's the feature I'm currently working on right now. We'll see some improvements in the future. :) -- Eduardo Otubo IBM Linux Technology Center
Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default
On 12/04/2013 07:39 AM, Stefan Hajnoczi wrote: On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote: Developers will only be happy with seccomp if it's easy and rewarding to support/debug. Agreed. As a developer, how do you feel about the audit/syslog based approach I mentioned earlier? I used the commands you posted (I think that's what you mean). They produce useful output. The problem is that without an error message on stderr or from the shell, no one will think QEMU process dead and hung == check seccomp immediately. It's frustrating to deal with a silent failure. The process dies with a SIGKILL, and sig handling in Qemu is hard to implement due to dozen of external linked libraries that has their own signal masks and conflicts with seccomp. I've already tried this approach in the past (you can find in the list by searching for debug mode) The optimal goal here is to use virt-test and audit log to eliminate these sorts of things. -- Eduardo Otubo IBM Linux Technology Center
Re: [Qemu-devel] [PATCH v6 0/6] Add cache mode option to qemu-iotests, and change default mode to writeback
On Wed, Dec 04, 2013 at 09:06:57AM +0800, Fam Zheng wrote: This series adds cache mode option in the iotests framework. Test cases are updated to make use of cache mode and mask supported modes. v6: [05] Recover disappeared two lines. (Benoît) Added Wenchao's reviewed-by lines to other patches. v5: Fix help test for -c mode. (Wenchao) v4: Address Stefan's comments: Add _default_cache_mode. Split last two cases in 048 to 074. Use long option --cache instead of -t for qemu-io. v3: Change _unsupported_qemu_io_options to _supported_cache_modes. Change default mode to writeback. Clean up some whitespaces in the end of series. Fix 026.out.nocache case. Fix 048 case on tmpfs. Fam Zheng (6): qemu-iotests: Add -c cache-mode option qemu-iotests: Honour cache mode in iotests.py qemu-iotests: Add _default_cache_mode and _supported_cache_modes qemu-iotests: Change default cache mode to writeback qemu-iotests: Clean up spaces in usage output qemu-iotests: Split qcow2 only cases in 048 tests/qemu-iotests/026| 3 +- tests/qemu-iotests/039| 3 +- tests/qemu-iotests/048| 27 -- tests/qemu-iotests/048.out| 16 tests/qemu-iotests/052| 4 +- tests/qemu-iotests/074| 86 +++ tests/qemu-iotests/074.out| 18 + tests/qemu-iotests/check | 2 +- tests/qemu-iotests/common | 37 ++- tests/qemu-iotests/common.rc | 25 - tests/qemu-iotests/iotests.py | 3 +- 11 files changed, 155 insertions(+), 69 deletions(-) create mode 100644 tests/qemu-iotests/074 create mode 100644 tests/qemu-iotests/074.out Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
Re: [Qemu-devel] [PATCH 3/5] Add vhostsock option
On Fri, Nov 29, 2013 at 08:52:24PM +0100, Antonios Motakis wrote: @@ -91,15 +91,27 @@ static int vhost_net_get_fd(NetClientState *backend) } } -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, - bool force) +struct vhost_net *vhost_net_init(NetClientState *backend, char *vhostsock, + int devfd, bool force) { int r; struct vhost_net *net = g_malloc(sizeof *net); +const char *backend_sock = 0; +VhostBackendType backend_type = VHOST_BACKEND_TYPE_NONE; + if (!backend) { fprintf(stderr, vhost-net requires backend to be setup\n); goto fail; } + +if (vhostsock strcmp(vhostsock, VHOST_NET_DEFAULT_SOCK) != 0) { This is a weird hack. Why check for VHOST_NET_DEFAULT_SOCK at all? If the option is not present then kernel vhost is used, if the option is present then userspace vhost is used. I don't understand why a magic hardcoded path is useful.
Re: [Qemu-devel] [PATCH 2/5] Add vhost-kernel and the vhost-user skeleton
On Fri, Nov 29, 2013 at 08:52:23PM +0100, Antonios Motakis wrote: diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 05de174..80defe4 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -25,9 +25,18 @@ static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, int vhost_call(struct vhost_dev *dev, unsigned long int request, void *arg) { -int result; +int result = -1; -result = vhost_kernel_call(dev, request, arg); +switch (dev-backend_type) { +case VHOST_BACKEND_TYPE_KERNEL: +result = vhost_kernel_call(dev, request, arg); +break; +case VHOST_BACKEND_TYPE_USER: +fprintf(stderr, vhost-user not implemented\n); +break; +default: +fprintf(stderr, Unknown vhost backend type\n); +} The switch statement approach gets messy fast when local variables are needed inside some case labels. It also makes it hard to conditionally compile features without using #ifdef. Perhaps instead a VhostOps struct could be used: /* Vhost backends implement this interface */ typedef struct { int vhost_call(struct vhost_dev *dev, unsigned long int request, void *arg); ... } VhostOps; const VhostOps vhost_kernel_ops = { ... }; const VhostOps vhost_user_opts = { ... }; ret = dev-vhost_ops-vhost_call(dev, request, arg); Something along those lines. It keeps the different backend implementations separate (they can live in separate files and be conditional in Makefile.objs).
Re: [Qemu-devel] [RFC 0/5] Vhost and vhost-net support for userspace based backends
On Fri, Nov 29, 2013 at 08:52:21PM +0100, Antonios Motakis wrote: In this patch series we would like to introduce our approach for putting a virtio-net backend in an external userspace process. Our eventual target is to run the network backend in the Snabbswitch ethernet switch, while receiving traffic from a guest inside QEMU/KVM which runs an unmodified virtio-net implementation. For this, we are working into extending vhost to allow equivalent functionality for userspace. Vhost already passes control of the data plane of virtio-net to the host kernel; we want to realize a similar model, but for userspace. In this patch series the concept of a vhost-backend is introduced. We define two vhost backend types - vhost-kernel and vhost-user. The former is the interface to the current kernel module implementation. Its control plane is ioctl based. The data plane is the kernel directly accessing the QEMU allocated, guest memory. In the new vhost-user backend, the control plane is based on communication between QEMU and another userspace process using a unix domain socket. This allows to implement a virtio backend for a guest running in QEMU, inside the other userspace process. One thing that came to mind when reading the patches is that you are implementing the vhost interface pretty much exactly as-is. Did you look at FUSE's character devices in userspace (CUSE)? IIRC even ioctl is supported so you might be able to skip the userspace backend entirely if you mimic vhost_net.ko's ioctl interface. Then all that's needed is some configuration/startup code to use shared memory and pass the eventfds. Stefan
Re: [Qemu-devel] [PATCH 3/4] dataplane: change vring API to use VirtQueueElement
On Thu, Oct 10, 2013 at 05:07:18PM +0200, Paolo Bonzini wrote: @@ -298,30 +278,31 @@ static void handle_notify(EventNotifier *e) vring_disable_notification(s-vdev, s-vring); for (;;) { -head = vring_pop(s-vdev, s-vring, iov, end, out_num, in_num); -if (head 0) { +ret = vring_pop(s-vdev, s-vring, elem); +if (ret 0) { +assert(elem == NULL); break; /* no more requests */ } -trace_virtio_blk_data_plane_process_request(s, out_num, in_num, -head); +trace_virtio_blk_data_plane_process_request(s, elem-out_num, +elem-in_num, elem-index); -if (process_request(s-ioqueue, iov, out_num, in_num, head) 0) { +if (process_request(s-ioqueue, elem) 0) { vring_set_broken(s-vring); +vring_push(s-vring, elem, 0); If we give up on the vring I don't think we should push the element back. It may cause the guest to panic. I guess what we really need here is to unmap scatter-gather buffers and delete elem. Stefan
Re: [Qemu-devel] [PATCH 0/4] dataplane: use more of the generic virtio data structures, drop hostmem
On Thu, Oct 10, 2013 at 05:07:15PM +0200, Paolo Bonzini wrote: Now that the memory API is thread-safe, we can use it in virtio-blk-dataplane and replace hostmem.[ch]. This series does this, and also changes the vring API to use VirtQueueElement (with an eye towards migration). With this change, virtio-blk-dataplane is also safe against memory hot-unplug. The next step would be to replace memory_region_find with address_space_{map,unmap}, which handle dirtying of memory correctly. However these APIs are not thread-safe yet, and neither is the handling of dirty memory (Juan's patches may be a start here). Also, the usage of iov_discard_{front,back} may cause some complication when we use address_space_{map,unmap}. We may have to change a bit the logic in virtio-blk-dataplane to switch to address_space_{map,unmap}. If we do not want to do this intermediate step, the first three patches can be applied separately from the fourth. Paolo Bonzini (4): vring: create a common function to parse descriptors vring: factor common code for error exits dataplane: change vring API to use VirtQueueElement dataplane: replace hostmem with memory_region_find hw/block/dataplane/virtio-blk.c | 86 +--- hw/virtio/dataplane/Makefile.objs | 2 +- hw/virtio/dataplane/hostmem.c | 183 - hw/virtio/dataplane/vring.c | 244 +- include/hw/virtio/dataplane/hostmem.h | 58 include/hw/virtio/dataplane/vring.h | 9 +- 6 files changed, 193 insertions(+), 389 deletions(-) delete mode 100644 hw/virtio/dataplane/hostmem.c delete mode 100644 include/hw/virtio/dataplane/hostmem.h Finally looked into this series. I think we should merge all patches so we start exercising the thread-safe memory API as soon as possible in the QEMU 2.0 release cycle. Before merging I wanted to discuss the vring-broken case which I've left a comment on. Stefan
Re: [Qemu-devel] [PATCH 3/5] Add vhostsock option
On 11/29/2013 12:52 PM, Antonios Motakis wrote: Adding a new tap network backend option - vhostsock. It points to a named unix domain socket, that will be used to communicate with vhost-user backend. This is a temporary work around; in future versions of this series vhost-user will be made independent from the tap network backend. +++ b/qapi-schema.json @@ -2891,6 +2891,8 @@ # # @vhostforce: #optional vhost on for non-MSIX virtio guests # +# @vhostsock: #optional vhost backend socket +# Needs a (since 2.0) designation. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
[Qemu-devel] [PATCH 04/32] tests: Fix schema parser test for in-tree build
From: Markus Armbruster arm...@redhat.com Commit 4f193e3 added the test, but screwed up in-tree builds (SRCDIR=.): the tests's output overwrites the expected output, and is thus compared to itself. Cc: qemu-sta...@nongnu.org Reported-by: Laszlo Ersek ler...@redhat.com Reviewed-by: Andreas Färber afaer...@suse.de Reviewed-by: Laszlo Ersek ler...@redhat.com Signed-off-by: Markus Armbruster arm...@redhat.com Signed-off-by: Michael Tokarev m...@tls.msk.ru (cherry picked from commit d8039e58b1ecfdc9af171502c83e3949f6dafb95) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- tests/.gitignore |1 + tests/Makefile |8 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/.gitignore b/tests/.gitignore index fb05c2a..d9c2ef4 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -19,3 +19,4 @@ test-thread-pool test-x86-cpuid test-xbzrle *-test +qapi-schema/*.test.* diff --git a/tests/Makefile b/tests/Makefile index d044908..ad98439 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -247,10 +247,10 @@ check-tests/test-qapi.py: tests/test-qapi.py .PHONY: $(patsubst %, check-%, $(check-qapi-schema-y)) $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json - $(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts $(PYTHON) $(SRC_PATH)/tests/qapi-schema/test-qapi.py $^ $*.out 2$*.err; echo $$? $*.exit, TEST $*.out) - @diff -q $(SRC_PATH)/$*.out $*.out - @diff -q $(SRC_PATH)/$*.err $*.err - @diff -q $(SRC_PATH)/$*.exit $*.exit + $(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts $(PYTHON) $(SRC_PATH)/tests/qapi-schema/test-qapi.py $^ $*.test.out 2$*.test.err; echo $$? $*.test.exit, TEST $*.out) + @diff -q $(SRC_PATH)/$*.out $*.test.out + @diff -q $(SRC_PATH)/$*.err $*.test.err + @diff -q $(SRC_PATH)/$*.exit $*.test.exit # Consolidated targets -- 1.7.9.5
[Qemu-devel] [PATCH 05/32] tests: Update .gitignore for test-int128 and test-bitops
From: Markus Armbruster arm...@redhat.com Forgotten in commit 6046c62 and 3464700. Cc: qemu-sta...@nongnu.org Reviewed-by: Andreas Färber afaer...@suse.de Reviewed-by: Laszlo Ersek ler...@redhat.com Signed-off-by: Markus Armbruster arm...@redhat.com Signed-off-by: Michael Tokarev m...@tls.msk.ru (cherry picked from commit 9dbb52e862458935c250bac9e71d5a87da4e33e9) Conflicts: tests/.gitignore *removed post-1.6 additions from diff Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- tests/.gitignore |2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/.gitignore b/tests/.gitignore index d9c2ef4..9ac044d 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -5,8 +5,10 @@ check-qjson check-qlist check-qstring test-aio +test-bitops test-cutils test-hbitmap +test-int128 test-iov test-mul64 test-qapi-types.[ch] -- 1.7.9.5
[Qemu-devel] Patch Round-up for stable 1.6.2, freeze on 2013-12-06
Hi everyone, The following new patches are queued for QEMU stable v1.6.2: https://github.com/mdroth/qemu/commits/stable-1.6-staging The release is planned for 2013-12-09: http://wiki.qemu.org/Planning/1.6 Please respond here or CC qemu-sta...@nongnu.org on any patches you think should be included in the release. The cut-off date is has been extended to 2013-12-06 due to the round-up email going out late. Testing/feedback is greatly appreciated. Thanks! Alex Williamson (1): vfio-pci: Fix multifunction=on Alexey Kardashevskiy (1): memory: fix 128 arithmetic in info mtree Amit Shah (3): char: move backends' io watch tag to CharDriverState char: use common function to disable callbacks on chardev close char: remove watch callback on chardev detach from frontend Amos Kong (2): virtio-net: fix the memory leak in rxfilter_notify() rng-egd: offset the point when repeatedly read from the buffer Bandan Das (1): pci: unregister vmstate_pcibus on unplug Cole Robinson (1): Fix pc migration from qemu = 1.5 Fam Zheng (1): vmdk: Fix vmdk_parse_extents Hans de Goede (1): audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second Igor Mammedov (1): qdev-monitor: Fix crash when device_add is called with abstract driver Jason Wang (1): virtio-net: only delete bh that existed Markus Armbruster (2): tests: Fix schema parser test for in-tree build tests: Update .gitignore for test-int128 and test-bitops Matthew Daley (1): xen_disk: mark ioreq as mapped before unmapping in error case Max Filippov (1): exec: fix breakpoint_invalidate when pc may not be translated Max Reitz (1): qcow2: count_contiguous_clusters and compression Mike Frysinger (1): configure: detect endian via compile test Paolo Bonzini (1): monitor: eliminate monitor_event_state_lock Peter Lieven (1): qcow2: fix possible corruption when reading multiple clusters Peter Maydell (1): configure: Explicitly set ARFLAGS so we can build with GNU Make 4.0 Richard Henderson (1): Adjust qapi-visit for python-2.4.3 Stefan Hajnoczi (1): qdev-monitor: Unref device when device_add fails Stefan Weil (5): tci: Add implementation of rotl_i64, rotr_i64 bitops: Add rotate functions (rol8, ror8, ...) misc: Use new rotate functions qemu-char: Fix potential out of bounds access to local arrays linux-user: Fix stat64 syscall for SPARC64 Vlad Yasevich (1): qom: Fix memory leak in object_property_set_link() Wenchao Xia (2): qapi: fix memleak by adding implict struct functions in dealloc visitor tests: fix memleak in error path test for input visitor audio/audio.c |3 +- backends/rng-egd.c |4 +- block/qcow2-cluster.c |7 +++- block/vmdk.c |7 +++- configure | 45 + exec.c |6 ++- hw/block/xen_disk.c|1 + hw/misc/vfio.c |7 hw/net/virtio-net.c| 10 ++--- hw/pci-host/piix.c |9 - hw/pci-host/q35.c | 10 - hw/pci/pci.c |8 include/hw/i386/pc.h |8 include/hw/pci-host/q35.h |1 + include/qemu/bitops.h | 80 + include/sysemu/char.h |1 + linux-user/syscall.c |6 +-- linux-user/syscall_defs.h | 14 +++ memory.c |4 +- monitor.c |6 --- qapi/qapi-dealloc-visitor.c| 20 ++ qdev-monitor.c |8 qemu-char.c| 86 +++- qom/object.c |5 ++- scripts/qapi-visit.py | 17 ++-- target-arm/iwmmxt_helper.c |2 +- tcg/optimize.c | 12 ++ tcg/tci/tcg-target.c |1 - tci.c | 14 +-- tests/.gitignore |3 ++ tests/Makefile |8 ++-- tests/test-qmp-input-visitor.c |1 + 32 files changed, 287 insertions(+), 127 deletions(-)
[Qemu-devel] [PATCH 07/32] bitops: Add rotate functions (rol8, ror8, ...)
From: Stefan Weil s...@weilnetz.de These functions were copies from include/linux/bitopts.h. Signed-off-by: Stefan Weil s...@weilnetz.de Reviewed-by: Richard Henderson r...@twiddle.net (cherry picked from commit 6aa25b4a7bb10c48c3054f268d5be98e42ea42c0) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- include/qemu/bitops.h | 80 + 1 file changed, 80 insertions(+) diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 06e2e6f..304c90c 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -184,6 +184,86 @@ static inline unsigned long hweight_long(unsigned long w) } /** + * rol8 - rotate an 8-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint8_t rol8(uint8_t word, unsigned int shift) +{ +return (word shift) | (word (8 - shift)); +} + +/** + * ror8 - rotate an 8-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint8_t ror8(uint8_t word, unsigned int shift) +{ +return (word shift) | (word (8 - shift)); +} + +/** + * rol16 - rotate a 16-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint16_t rol16(uint16_t word, unsigned int shift) +{ +return (word shift) | (word (16 - shift)); +} + +/** + * ror16 - rotate a 16-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint16_t ror16(uint16_t word, unsigned int shift) +{ +return (word shift) | (word (16 - shift)); +} + +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint32_t rol32(uint32_t word, unsigned int shift) +{ +return (word shift) | (word (32 - shift)); +} + +/** + * ror32 - rotate a 32-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint32_t ror32(uint32_t word, unsigned int shift) +{ +return (word shift) | (word (32 - shift)); +} + +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint64_t rol64(uint64_t word, unsigned int shift) +{ +return (word shift) | (word (64 - shift)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline uint64_t ror64(uint64_t word, unsigned int shift) +{ +return (word shift) | (word (64 - shift)); +} + +/** * extract32: * @value: the value to extract the bit field from * @start: the lowest bit in the bit field (numbered from 0) -- 1.7.9.5
[Qemu-devel] [PATCH 01/32] char: move backends' io watch tag to CharDriverState
From: Amit Shah amit.s...@redhat.com All the backends implement an io watcher tag for callbacks. Move it to CharDriverState from each backend's struct to make accessing the tag from backend-neutral functions easier. This will be used later to cancel a callback on chardev detach from a frontend. CC: qemu-sta...@nongnu.org Reviewed-by: Gerd Hoffmann kra...@redhat.com Signed-off-by: Amit Shah amit.s...@redhat.com (cherry picked from commit 7ba9addc165b37b764baa08c02518b15b2361707) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- include/sysemu/char.h |1 + qemu-char.c | 77 + 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 8053130..ad101d9 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -78,6 +78,7 @@ struct CharDriverState { int explicit_be_open; int avail_connections; int is_mux; +guint fd_in_tag; QemuOpts *opts; QTAILQ_ENTRY(CharDriverState) next; }; diff --git a/qemu-char.c b/qemu-char.c index 1621fbd..1dc1646 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -798,7 +798,6 @@ static int io_channel_send(GIOChannel *fd, const void *buf, size_t len) typedef struct FDCharDriver { CharDriverState *chr; GIOChannel *fd_in, *fd_out; -guint fd_in_tag; int max_size; QTAILQ_ENTRY(FDCharDriver) node; } FDCharDriver; @@ -830,9 +829,9 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) status = g_io_channel_read_chars(chan, (gchar *)buf, len, bytes_read, NULL); if (status == G_IO_STATUS_EOF) { -if (s-fd_in_tag) { -io_remove_watch_poll(s-fd_in_tag); -s-fd_in_tag = 0; +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; } qemu_chr_be_event(chr, CHR_EVENT_CLOSED); return FALSE; @@ -863,13 +862,14 @@ static void fd_chr_update_read_handler(CharDriverState *chr) { FDCharDriver *s = chr-opaque; -if (s-fd_in_tag) { -io_remove_watch_poll(s-fd_in_tag); -s-fd_in_tag = 0; +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; } if (s-fd_in) { -s-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll, fd_chr_read, chr); +chr-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll, + fd_chr_read, chr); } } @@ -877,9 +877,9 @@ static void fd_chr_close(struct CharDriverState *chr) { FDCharDriver *s = chr-opaque; -if (s-fd_in_tag) { -io_remove_watch_poll(s-fd_in_tag); -s-fd_in_tag = 0; +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; } if (s-fd_in) { @@ -1012,7 +1012,6 @@ static CharDriverState *qemu_chr_open_stdio(ChardevStdio *opts) typedef struct { GIOChannel *fd; -guint fd_tag; int connected; int read_bytes; guint timer_tag; @@ -1123,9 +1122,9 @@ static void pty_chr_state(CharDriverState *chr, int connected) PtyCharDriver *s = chr-opaque; if (!connected) { -if (s-fd_tag) { -io_remove_watch_poll(s-fd_tag); -s-fd_tag = 0; +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; } s-connected = 0; /* (re-)connect poll interval for idle guests: once per second. @@ -1140,7 +1139,8 @@ static void pty_chr_state(CharDriverState *chr, int connected) if (!s-connected) { s-connected = 1; qemu_chr_be_generic_open(chr); -s-fd_tag = io_add_watch_poll(s-fd, pty_chr_read_poll, pty_chr_read, chr); +chr-fd_in_tag = io_add_watch_poll(s-fd, pty_chr_read_poll, + pty_chr_read, chr); } } } @@ -1151,9 +1151,9 @@ static void pty_chr_close(struct CharDriverState *chr) PtyCharDriver *s = chr-opaque; int fd; -if (s-fd_tag) { -io_remove_watch_poll(s-fd_tag); -s-fd_tag = 0; +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; } fd = g_io_channel_unix_get_fd(s-fd); g_io_channel_unref(s-fd); @@ -2161,7 +2161,6 @@ static CharDriverState *qemu_chr_open_stdio(ChardevStdio *opts) typedef struct { int fd; GIOChannel *chan; -guint tag; uint8_t buf[READ_BUF_LEN]; int bufcnt; int bufptr; @@ -2217,9 +2216,9 @@ static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) s-bufcnt = bytes_read; s-bufptr = s-bufcnt; if (status != G_IO_STATUS_NORMAL) { -if (s-tag) { -io_remove_watch_poll(s-tag); -s-tag = 0; +if (chr-fd_in_tag) { +
[Qemu-devel] [PATCH 06/32] tci: Add implementation of rotl_i64, rotr_i64
From: Stefan Weil s...@weilnetz.de It is used by qemu-ppc64 when running Debian's busybox-static. Cc: qemu-stable qemu-sta...@nongnu.org Signed-off-by: Stefan Weil s...@weilnetz.de Reviewed-by: Richard Henderson r...@twiddle.net (cherry picked from commit d285bf784b6234e994ce73c05c82c9fb6429df00) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- tcg/tci/tcg-target.c |1 - tci.c| 10 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index e118bc7..7f86a7f 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -677,7 +677,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: -/* TODO: Implementation of rotl_i64, rotr_i64 missing in tci.c. */ case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ tcg_out_r(s, args[0]); diff --git a/tci.c b/tci.c index af58576..b09ad25 100644 --- a/tci.c +++ b/tci.c @@ -952,8 +952,16 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) break; #if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: +t0 = *tb_ptr++; +t1 = tci_read_ri64(tb_ptr); +t2 = tci_read_ri64(tb_ptr); +tci_write_reg64(t0, (t1 t2) | (t1 (64 - t2))); +break; case INDEX_op_rotr_i64: -TODO(); +t0 = *tb_ptr++; +t1 = tci_read_ri64(tb_ptr); +t2 = tci_read_ri64(tb_ptr); +tci_write_reg64(t0, (t1 t2) | (t1 (64 - t2))); break; #endif #if TCG_TARGET_HAS_deposit_i64 -- 1.7.9.5
[Qemu-devel] [PATCH 02/32] char: use common function to disable callbacks on chardev close
From: Amit Shah amit.s...@redhat.com This deduplicates code used a lot of times. CC: qemu-sta...@nongnu.org Reviewed-by: Gerd Hoffmann kra...@redhat.com Signed-off-by: Amit Shah amit.s...@redhat.com (cherry picked from commit 26da70c72524eb22c946ab19ec98a217b8252f7e) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qemu-char.c | 62 ++- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index 1dc1646..fa00517 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -725,6 +725,14 @@ static void io_remove_watch_poll(guint tag) g_source_destroy(iwp-parent); } +static void remove_fd_in_watch(CharDriverState *chr) +{ +if (chr-fd_in_tag) { +io_remove_watch_poll(chr-fd_in_tag); +chr-fd_in_tag = 0; +} +} + #ifndef _WIN32 static GIOChannel *io_channel_from_fd(int fd) { @@ -829,10 +837,7 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) status = g_io_channel_read_chars(chan, (gchar *)buf, len, bytes_read, NULL); if (status == G_IO_STATUS_EOF) { -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); return FALSE; } @@ -862,11 +867,7 @@ static void fd_chr_update_read_handler(CharDriverState *chr) { FDCharDriver *s = chr-opaque; -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} - +remove_fd_in_watch(chr); if (s-fd_in) { chr-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll, fd_chr_read, chr); @@ -877,11 +878,7 @@ static void fd_chr_close(struct CharDriverState *chr) { FDCharDriver *s = chr-opaque; -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} - +remove_fd_in_watch(chr); if (s-fd_in) { g_io_channel_unref(s-fd_in); } @@ -1122,10 +1119,7 @@ static void pty_chr_state(CharDriverState *chr, int connected) PtyCharDriver *s = chr-opaque; if (!connected) { -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); s-connected = 0; /* (re-)connect poll interval for idle guests: once per second. * We check more frequently in case the guests sends data to @@ -1151,10 +1145,7 @@ static void pty_chr_close(struct CharDriverState *chr) PtyCharDriver *s = chr-opaque; int fd; -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); fd = g_io_channel_unix_get_fd(s-fd); g_io_channel_unref(s-fd); close(fd); @@ -2216,10 +2207,7 @@ static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) s-bufcnt = bytes_read; s-bufptr = s-bufcnt; if (status != G_IO_STATUS_NORMAL) { -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); return FALSE; } @@ -2237,11 +2225,7 @@ static void udp_chr_update_read_handler(CharDriverState *chr) { NetCharDriver *s = chr-opaque; -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} - +remove_fd_in_watch(chr); if (s-chan) { chr-fd_in_tag = io_add_watch_poll(s-chan, udp_chr_read_poll, udp_chr_read, chr); @@ -2251,10 +2235,8 @@ static void udp_chr_update_read_handler(CharDriverState *chr) static void udp_chr_close(CharDriverState *chr) { NetCharDriver *s = chr-opaque; -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} + +remove_fd_in_watch(chr); if (s-chan) { g_io_channel_unref(s-chan); closesocket(s-fd); @@ -2489,10 +2471,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) if (s-listen_chan) { s-listen_tag = g_io_add_watch(s-listen_chan, G_IO_IN, tcp_chr_accept, chr); } -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); g_io_channel_unref(s-chan); s-chan = NULL; closesocket(s-fd); @@ -2606,10 +2585,7 @@ static void tcp_chr_close(CharDriverState *chr) { TCPCharDriver *s = chr-opaque; if (s-fd = 0) { -if (chr-fd_in_tag) { -io_remove_watch_poll(chr-fd_in_tag); -chr-fd_in_tag = 0; -} +remove_fd_in_watch(chr); if (s-chan) {
[Qemu-devel] [PATCH 08/32] misc: Use new rotate functions
From: Stefan Weil s...@weilnetz.de Signed-off-by: Stefan Weil s...@weilnetz.de (cherry picked from commit 3df2b8fde949be86d8a78923c992fdd698d4ea4c) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- target-arm/iwmmxt_helper.c |2 +- tcg/optimize.c | 12 tci.c |8 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c index 7953b53..e6cfa62 100644 --- a/target-arm/iwmmxt_helper.c +++ b/target-arm/iwmmxt_helper.c @@ -577,7 +577,7 @@ uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n) uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n) { -x = (x n) | (x (64 - n)); +x = ror64(x, n); env-iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); return x; } diff --git a/tcg/optimize.c b/tcg/optimize.c index b35868a..adb5258 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -236,20 +236,16 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) return (int64_t)x (int64_t)y; case INDEX_op_rotr_i32: -x = ((uint32_t)x (32 - y)) | ((uint32_t)x y); -return x; +return ror32(x, y); case INDEX_op_rotr_i64: -x = ((uint64_t)x (64 - y)) | ((uint64_t)x y); -return x; +return ror64(x, y); case INDEX_op_rotl_i32: -x = ((uint32_t)x y) | ((uint32_t)x (32 - y)); -return x; +return rol32(x, y); case INDEX_op_rotl_i64: -x = ((uint64_t)x y) | ((uint64_t)x (64 - y)); -return x; +return rol64(x, y); CASE_OP_32_64(not): return ~x; diff --git a/tci.c b/tci.c index b09ad25..53c4b66 100644 --- a/tci.c +++ b/tci.c @@ -688,13 +688,13 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = *tb_ptr++; t1 = tci_read_ri32(tb_ptr); t2 = tci_read_ri32(tb_ptr); -tci_write_reg32(t0, (t1 t2) | (t1 (32 - t2))); +tci_write_reg32(t0, rol32(t1, t2)); break; case INDEX_op_rotr_i32: t0 = *tb_ptr++; t1 = tci_read_ri32(tb_ptr); t2 = tci_read_ri32(tb_ptr); -tci_write_reg32(t0, (t1 t2) | (t1 (32 - t2))); +tci_write_reg32(t0, ror32(t1, t2)); break; #endif #if TCG_TARGET_HAS_deposit_i32 @@ -955,13 +955,13 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) t0 = *tb_ptr++; t1 = tci_read_ri64(tb_ptr); t2 = tci_read_ri64(tb_ptr); -tci_write_reg64(t0, (t1 t2) | (t1 (64 - t2))); +tci_write_reg64(t0, rol64(t1, t2)); break; case INDEX_op_rotr_i64: t0 = *tb_ptr++; t1 = tci_read_ri64(tb_ptr); t2 = tci_read_ri64(tb_ptr); -tci_write_reg64(t0, (t1 t2) | (t1 (64 - t2))); +tci_write_reg64(t0, ror64(t1, t2)); break; #endif #if TCG_TARGET_HAS_deposit_i64 -- 1.7.9.5
[Qemu-devel] [PATCH 32/32] rng-egd: offset the point when repeatedly read from the buffer
From: Amos Kong ak...@redhat.com The buffer content might be read out more than once, currently we just repeatedly read the first data block, buffer offset is missing. Cc: qemu-sta...@nongnu.org Signed-off-by: Amos Kong ak...@redhat.com Message-id: 1385023371-8198-3-git-send-email-ak...@redhat.com Signed-off-by: Anthony Liguori aligu...@amazon.com (cherry picked from commit 1eb1bd9eafa890f1f4d16ef5cb8b9239a86874d9) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- backends/rng-egd.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/rng-egd.c b/backends/rng-egd.c index 9e5a536..2962795 100644 --- a/backends/rng-egd.c +++ b/backends/rng-egd.c @@ -91,12 +91,14 @@ static int rng_egd_chr_can_read(void *opaque) static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size) { RngEgd *s = RNG_EGD(opaque); +size_t buf_offset = 0; while (size 0 s-requests) { RngRequest *req = s-requests-data; int len = MIN(size, req-size - req-offset); -memcpy(req-data + req-offset, buf, len); +memcpy(req-data + req-offset, buf + buf_offset, len); +buf_offset += len; req-offset += len; size -= len; -- 1.7.9.5
[Qemu-devel] [PATCH 12/32] audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* second
From: Hans de Goede hdego...@redhat.com Now that we no longer have MIN_REARM_TIMER_NS a bug in the audio subsys has clearly shown it self by trying to make a timer fire every nano second. Note we have a similar problem in 1.6, 1.5 and older but there MIN_REARM_TIMER_NS limits the wakeups caused by audio being active to 4000 times / second. This still causes a host cpu load of 50 % for simply playing audio, where as with this patch git master is at 13%, so we should backport this to 1.5 and 1.6 too. Note this will not apply to 1.5 and 1.6 as is. Cc: qemu-sta...@nongnu.org Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com (cherry picked from commit b4350deed67b95651896ddb60cf9f765093a4848) Conflicts: audio/audio.c *fixed to reflect 1.6 timer function/clock names Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- audio/audio.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/audio/audio.c b/audio/audio.c index 02bb886..bcd41a9 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -1124,7 +1124,8 @@ static int audio_is_timer_needed (void) static void audio_reset_timer (AudioState *s) { if (audio_is_timer_needed ()) { -qemu_mod_timer (s-ts, qemu_get_clock_ns (vm_clock) + 1); +qemu_mod_timer (s-ts, +qemu_get_clock_ns(vm_clock) + conf.period.ticks); } else { qemu_del_timer (s-ts); -- 1.7.9.5
[Qemu-devel] [PATCH 10/32] xen_disk: mark ioreq as mapped before unmapping in error case
From: Matthew Daley mat...@gmail.com Commit 4472beae modified the semantics of ioreq_{un,}map so that they are idempotent if called when they're not needed (ie., twice in a row). However, it neglected to handle the case where batch mapping is not being used (the default), and one of the grants fails to map. In this case, ioreq_unmap will be called to unwind and unmap any mappings already performed, but ioreq_unmap simply returns due to the aforementioned change (the ioreq has not already been marked as mapped). The frontend user can therefore force xen_disk to leak grant mappings, a per-domain limited resource. Fix by marking the ioreq as mapped before calling ioreq_unmap in this situation. Signed-off-by: Matthew Daley mat...@gmail.com Signed-off-by: Stefano Stabellini stefano.stabell...@eu.citrix.com (cherry picked from commit a76f48e53382e6f039db6278443e3ce437653302) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hw/block/xen_disk.c |1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 727f433..03e30d7 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -405,6 +405,7 @@ static int ioreq_map(struct ioreq *ioreq) xen_be_printf(ioreq-blkdev-xendev, 0, can't map grant ref %d (%s, %d maps)\n, refs[i], strerror(errno), ioreq-blkdev-cnt_map); +ioreq-mapped = 1; ioreq_unmap(ioreq); return -1; } -- 1.7.9.5
[Qemu-devel] [PATCH 30/32] qdev-monitor: Unref device when device_add fails
From: Stefan Hajnoczi stefa...@redhat.com qdev_device_add() leaks the created device upon failure. I suspect this problem crept in because qdev_free() unparents the device but does not drop a reference - confusing name. Cc: qemu-sta...@nongnu.org Signed-off-by: Stefan Hajnoczi stefa...@redhat.com Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Andreas Färber afaer...@suse.de (cherry picked from commit ee6abeb6ec08473713848ce9028110f1684853b7) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qdev-monitor.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/qdev-monitor.c b/qdev-monitor.c index bb2e1b6..1b2c606 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -518,6 +518,7 @@ DeviceState *qdev_device_add(QemuOpts *opts) } if (qemu_opt_foreach(opts, set_property, qdev, 1) != 0) { qdev_free(qdev); +object_unref(OBJECT(qdev)); return NULL; } if (qdev-id) { @@ -531,6 +532,7 @@ DeviceState *qdev_device_add(QemuOpts *opts) g_free(name); } if (qdev_init(qdev) 0) { +object_unref(OBJECT(qdev)); qerror_report(QERR_DEVICE_INIT_FAILED, driver); return NULL; } -- 1.7.9.5
[Qemu-devel] [PATCH 31/32] pci: unregister vmstate_pcibus on unplug
From: Bandan Das b...@redhat.com PCIBus registers a vmstate during init. Unregister it upon removal/unplug. Signed-off-by: Bandan Das b...@redhat.com Cc: qemu-sta...@nongnu.org Reviewed-by: Andreas Färber afaer...@suse.de Signed-off-by: Michael S. Tsirkin m...@redhat.com (cherry picked from commit 5c397242d5d53c1adecce31817bb439383cf8228) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hw/pci/pci.c |8 1 file changed, 8 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4c004f5..dc5b788 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -47,6 +47,7 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); static int pcibus_reset(BusState *qbus); +static void pci_bus_finalize(Object *obj); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN(addr, PCIDevice, devfn, -1), @@ -73,6 +74,7 @@ static const TypeInfo pci_bus_info = { .name = TYPE_PCI_BUS, .parent = TYPE_BUS, .instance_size = sizeof(PCIBus), +.instance_finalize = pci_bus_finalize, .class_init = pci_bus_class_init, }; @@ -375,6 +377,12 @@ int pci_bus_num(PCIBus *s) return s-parent_dev-config[PCI_SECONDARY_BUS]; } +static void pci_bus_finalize(Object *obj) +{ +PCIBus *bus = PCI_BUS(obj); +vmstate_unregister(NULL, vmstate_pcibus, bus); +} + static int get_pci_config_device(QEMUFile *f, void *pv, size_t size) { PCIDevice *s = container_of(pv, PCIDevice, config); -- 1.7.9.5
[Qemu-devel] [PATCH 26/32] vfio-pci: Fix multifunction=on
From: Alex Williamson alex.william...@redhat.com When an assigned device is initialized it copies the device config space into the emulated config space. Unfortunately multifunction is setup prior to the device initfn and gets clobbered. We need to restore it just like pci-assign does. Cc: qemu-sta...@nongnu.org Signed-off-by: Alex Williamson alex.william...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com (cherry picked from commit 8d07d6c46597a885eb38d99cc6fff399ce69cd21) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hw/misc/vfio.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 017e693..d9e78e1 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -3142,6 +3142,13 @@ static int vfio_initfn(PCIDevice *pdev) vdev-emulated_config_bits[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_MULTI_FUNCTION; +/* Restore or clear multifunction, this is always controlled by QEMU */ +if (vdev-pdev.cap_present QEMU_PCI_CAP_MULTIFUNCTION) { +vdev-pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; +} else { +vdev-pdev.config[PCI_HEADER_TYPE] = ~PCI_HEADER_TYPE_MULTI_FUNCTION; +} + /* * Clear host resource mapping info. If we choose not to register a * BAR, such as might be the case with the option ROM, we can get -- 1.7.9.5
[Qemu-devel] [PATCH 29/32] qdev-monitor: Fix crash when device_add is called with abstract driver
From: Igor Mammedov imamm...@redhat.com User is able to crash running QEMU when following monitor command is called: device_add intel-hda-generic Crash is caused by assertion in object_initialize_with_type() when type is abstract. Checking if type is abstract before instance is created in qdev_device_add() allows to prevent crash on incorrect user input. Cc: qemu-sta...@nongnu.org Signed-off-by: Igor Mammedov imamm...@redhat.com Signed-off-by: Andreas Färber afaer...@suse.de (cherry picked from commit 2fa4e56d88aa0039062bbc7f9a88e9f90c77ed94) Conflicts: qdev-monitor.c *updated to reflect different 1.6 variable names Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qdev-monitor.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/qdev-monitor.c b/qdev-monitor.c index 410cdcb..bb2e1b6 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -472,6 +472,12 @@ DeviceState *qdev_device_add(QemuOpts *opts) return NULL; } +if (object_class_is_abstract(obj)) { +qerror_report(QERR_INVALID_PARAMETER_VALUE, driver, + non-abstract device type); +return NULL; +} + k = DEVICE_CLASS(obj); /* find bus */ -- 1.7.9.5
[Qemu-devel] [PATCH 28/32] qom: Fix memory leak in object_property_set_link()
From: Vlad Yasevich vyase...@redhat.com Save the result of the call to object_get_canonical_path() so we can free it. Cc: qemu-sta...@nongnu.org Signed-off-by: Vlad Yasevich vyase...@redhat.com Reviewed-by: Amos Kong ak...@redhat.com Reviewed-by: Stefan Hajnoczi stefa...@redhat.com Signed-off-by: Andreas Färber afaer...@suse.de (cherry picked from commit 2d3aa28cc2cf382aa04cd577e0be542175eea9bd) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qom/object.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qom/object.c b/qom/object.c index b2479d1..333f807 100644 --- a/qom/object.c +++ b/qom/object.c @@ -823,8 +823,9 @@ char *object_property_get_str(Object *obj, const char *name, void object_property_set_link(Object *obj, Object *value, const char *name, Error **errp) { -object_property_set_str(obj, object_get_canonical_path(value), -name, errp); +gchar *path = object_get_canonical_path(value); +object_property_set_str(obj, path, name, errp); +g_free(path); } Object *object_property_get_link(Object *obj, const char *name, -- 1.7.9.5
Re: [Qemu-devel] [RFC PATCH v1 0/5] Add error_abort and associated cleanups
On 12/04/2013 02:11 AM, Markus Armbruster wrote: No objection to asserting that the caller passed an error object when the error object is the only way to signal failure. You can't force your callers to check for failure, but the assertion could help prevent accidental misuse. Assertions fire at run-time, though. Unfortunately true. Asserting argument not null first thing in the function should enable a sufficiently smart whole-program static checker to flag null arguments. Coverity is such a checker; I think clang can as well. But having such a static check right at compile-time would be much better. Could attribute nonnull do it? If yes, do we still need the assertion? gcc's implementation of attribute nonnull is complete trash. And the gcc developers know it. The attribute is still useful for Coverity, but at least in libvirt, we have taken to using the attribute ONLY when compiling under a static checker and omitting it under gcc because gcc's implementation of the attribute is so horribly botched. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17308 So even with attribute nonnull, you still need the assertion. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default
On 12/04/2013 08:21 AM, Eduardo Otubo wrote: On 12/04/2013 07:39 AM, Stefan Hajnoczi wrote: On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote: Developers will only be happy with seccomp if it's easy and rewarding to support/debug. Agreed. As a developer, how do you feel about the audit/syslog based approach I mentioned earlier? I used the commands you posted (I think that's what you mean). They produce useful output. The problem is that without an error message on stderr or from the shell, no one will think QEMU process dead and hung == check seccomp immediately. It's frustrating to deal with a silent failure. The process dies with a SIGKILL, and sig handling in Qemu is hard to implement due to dozen of external linked libraries that has their own signal masks and conflicts with seccomp. I've already tried this approach in the past (you can find in the list by searching for debug mode) And just to be clear, the signal handling approach was only for debug purposes. There are basically three ways to fail a syscall with seccomp: SECCOMP_RET_KILL - kernel kills the task immediately without executing syscall SECCOMP_RET_TRAP - kernel sends SIGSYS to the task without executing syscall SECCOMP_RET_ERRNO - kernel returns an errno to the task wtihout executing syscall You could issue a better error messages if you used TRAP or ERRNO, but giving control back to QEMU after (presumably) arbitrary code is being executed sort of defeats the purpose. -- Regards, Corey Bryant The optimal goal here is to use virt-test and audit log to eliminate these sorts of things.
[Qemu-devel] [PATCH 03/32] char: remove watch callback on chardev detach from frontend
From: Amit Shah amit.s...@redhat.com If a frontend device releases the chardev (via unplug), the chr handlers are set to NULL via qdev's exit callbacks invoking qemu_chr_add_handlers(). If the chardev had a pending operation, a callback will be invoked, which will try to access data in the just-released frontend, causing a segfault. Ensure the callbacks are disabled when frontends release chardevs. This was seen when a virtio-serial port was unplugged when heavy guest-host IO was in progress (causing a callback to be registered). In the window in which the throttling was active, unplugging ports caused a qemu segfault. https://bugzilla.redhat.com/show_bug.cgi?id=985205 CC: qemu-sta...@nongnu.org Reported-by: Sibiao Luo s...@redhat.com Reviewed-by: Gerd Hoffmann kra...@redhat.com Signed-off-by: Amit Shah amit.s...@redhat.com (cherry picked from commit 386a5a1e0057e220f79c48fe3689e3dfb17f1b09) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qemu-char.c |3 +++ 1 file changed, 3 insertions(+) diff --git a/qemu-char.c b/qemu-char.c index fa00517..fc1c23d 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -193,6 +193,8 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...) va_end(ap); } +static void remove_fd_in_watch(CharDriverState *chr); + void qemu_chr_add_handlers(CharDriverState *s, IOCanReadHandler *fd_can_read, IOReadHandler *fd_read, @@ -203,6 +205,7 @@ void qemu_chr_add_handlers(CharDriverState *s, if (!opaque !fd_can_read !fd_read !fd_event) { fe_open = 0; +remove_fd_in_watch(s); } else { fe_open = 1; } -- 1.7.9.5
Re: [Qemu-devel] [PATCH V7 0/6] export internal snapshot by qemu-nbd
On Wed, Dec 04, 2013 at 05:10:53PM +0800, Wenchao Xia wrote: This series allow user to read internal snapshot's contents without qemu-img convert. V2: Address Stefan's comments: 02: add 'fall through' comments in the case statement. 03: add doc about the difference of internal snapshot and backing chain snapshot, which is used in previous '--snapshot' parameter. Other: 01,04: rebased on upstream with conflict resolved. v3: Address Paolo's comments: 02: add parameter -l snapshot_id_or_name, rename options snapshot-load to load-snapshot, use QemuOpts. 03: rename snapshot-load to load-snapshot. 04: related change to test both -l and -L case. 05-07: add similar parameter for qemu-img convert. Other: 01: foldered original snapshot logic into function bdrv_snapshot_load_tmp_by_id_or_name(), since multiple caller present in this version. Refined error message from , reason: %s to : %s. 02: Refined error message from , reason: %s to : %s. 03: Rename PARAM to SNAPSHOT_PARAM. v4: Address Eric's comments: 01: typo fix. 02: squashed patch. Use only one option -l and distiguish the cases by the starting string. 03: remove 'eval', remove '_supported_os Linux', remove the comments that have typo. 04: squashed patch. Add only one option -l and distiguish the cases by the starting string. 05: remove indentation. Address Paolo's comments: 02: Use only one option -l and distiguish the cases by the starting string. 04: Use only one option -l and distiguish the cases by the starting string, mark old -s as deprecated, added doc for them. v5: Address Jeff's comments: Quote image name in test cases. v6: Address Kevin's comments: 1: typo fix, remove device and snapshot info in error message. 2: use strstart(). 3: use _require_command(), limit proto to file, since when proto=nbd it can't work. also changed _require_command() to tip better. 4: use strstart(). 6: new patch, since I found the doc missing in debugging. v7: Address Stefan's comments: 3: use unix socket and wait for 30s, to make the case reliable, folder 4: add missing doc change. Other: 3,5: add label method 1/method 2 in echo message, to tip better. Wenchao Xia (6): 1 snapshot: distinguish id and name in load_tmp 2 qemu-nbd: support internal snapshot export 3 qemu-iotests: add 058 internal snapshot export with qemu-nbd case 4 qemu-img: add -l for snapshot in convert 5 qemu-iotests: add test for snapshot in qemu-img convert 6 qemu-nbd: add doc for option -f block/qcow2-snapshot.c | 10 +++- block/qcow2.h|5 +- block/snapshot.c | 77 ++- include/block/block_int.h|4 +- include/block/snapshot.h | 15 - qemu-img-cmds.hx |4 +- qemu-img.c | 44 +++-- qemu-img.texi| 12 +++- qemu-nbd.c | 47 ++- qemu-nbd.texi| 10 +++- tests/qemu-iotests/058 | 138 ++ tests/qemu-iotests/058.out | 44 + tests/qemu-iotests/check |1 + tests/qemu-iotests/common.rc |3 +- tests/qemu-iotests/group |1 + 15 files changed, 390 insertions(+), 25 deletions(-) create mode 100755 tests/qemu-iotests/058 create mode 100644 tests/qemu-iotests/058.out Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
[Qemu-devel] [PATCH 13/32] monitor: eliminate monitor_event_state_lock
From: Paolo Bonzini pbonz...@redhat.com This lock does not protect anything that the BQL does not already protect. Furthermore, with -nodefaults and no monitor, the mutex is not initialized but monitor_protocol_event_queue is called anyway, which causes a crash under mingw (and only works by luck. under Linux or other POSIX OSes). Reported-by: Orx Goshen orx.gos...@intel.com Cc: Daniel Berrange berra...@redhat.com Cc: qemu-sta...@nongnu.org Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Luiz Capitulino lcapitul...@redhat.com (cherry picked from commit c20b7fa4b2fedd979bcb0cc974bb5d08a10e3448) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- monitor.c |6 -- 1 file changed, 6 deletions(-) diff --git a/monitor.c b/monitor.c index 5dc0aa9..99bfcd9 100644 --- a/monitor.c +++ b/monitor.c @@ -508,7 +508,6 @@ static const char *monitor_event_names[] = { QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) MonitorEventState monitor_event_state[QEVENT_MAX]; -QemuMutex monitor_event_state_lock; /* * Emits the event to every monitor instance @@ -540,7 +539,6 @@ monitor_protocol_event_queue(MonitorEvent event, int64_t now = qemu_get_clock_ns(rt_clock); assert(event QEVENT_MAX); -qemu_mutex_lock(monitor_event_state_lock); evstate = (monitor_event_state[event]); trace_monitor_protocol_event_queue(event, data, @@ -573,7 +571,6 @@ monitor_protocol_event_queue(MonitorEvent event, evstate-last = now; } } -qemu_mutex_unlock(monitor_event_state_lock); } @@ -586,7 +583,6 @@ static void monitor_protocol_event_handler(void *opaque) MonitorEventState *evstate = opaque; int64_t now = qemu_get_clock_ns(rt_clock); -qemu_mutex_lock(monitor_event_state_lock); trace_monitor_protocol_event_handler(evstate-event, evstate-data, @@ -598,7 +594,6 @@ static void monitor_protocol_event_handler(void *opaque) evstate-data = NULL; } evstate-last = now; -qemu_mutex_unlock(monitor_event_state_lock); } @@ -635,7 +630,6 @@ monitor_protocol_event_throttle(MonitorEvent event, * and initialize state */ static void monitor_protocol_event_init(void) { -qemu_mutex_init(monitor_event_state_lock); /* Limit RTC BALLOON events to 1 per second */ monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000); monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000); -- 1.7.9.5
[Qemu-devel] [PATCH 27/32] virtio-net: fix the memory leak in rxfilter_notify()
From: Amos Kong ak...@redhat.com object_get_canonical_path() returns a gchar*, it should be freed by the caller. Signed-off-by: Amos Kong ak...@redhat.com Reviewed-by: Michael S. Tsirkin m...@redhat.com Reviewed-by: Vlad Yasevich vyase...@redhat.com Reviewed-by: Andreas Färber afaer...@suse.de Signed-off-by: Stefan Hajnoczi stefa...@redhat.com (cherry picked from commit 96e35046e4a97df5b4e1e24e217eb1e1701c7c71) Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hw/net/virtio-net.c |8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index bb757b3..5320aab 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -200,16 +200,16 @@ static void rxfilter_notify(NetClientState *nc) VirtIONet *n = qemu_get_nic_opaque(nc); if (nc-rxfilter_notify_enabled) { +gchar *path = object_get_canonical_path(OBJECT(n-qdev)); if (n-netclient_name) { event_data = qobject_from_jsonf({ 'name': %s, 'path': %s }, -n-netclient_name, - object_get_canonical_path(OBJECT(n-qdev))); +n-netclient_name, path); } else { -event_data = qobject_from_jsonf({ 'path': %s }, - object_get_canonical_path(OBJECT(n-qdev))); +event_data = qobject_from_jsonf({ 'path': %s }, path); } monitor_protocol_event(QEVENT_NIC_RX_FILTER_CHANGED, event_data); qobject_decref(event_data); +g_free(path); /* disable event notification to avoid events flooding */ nc-rxfilter_notify_enabled = 0; -- 1.7.9.5