date:20131204

[Qemu-devel] [PATCH V17 01/11] NUMA: move numa related code to new file numa.c

2013-12-04 Thread Wanlong Gao

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 Makefile.target |   2 +-
 cpus.c  |  14 
 include/sysemu/cpus.h   |   1 -
 include/sysemu/sysemu.h |   3 +
 numa.c  | 182 
 vl.c| 139 +---
 6 files changed, 187 insertions(+), 154 deletions(-)
 create mode 100644 numa.c

diff --git a/Makefile.target b/Makefile.target
index af6ac7e..0197c17 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -109,7 +109,7 @@ endif #CONFIG_BSD_USER
 #
 # System emulator target
 ifdef CONFIG_SOFTMMU
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o
 obj-y += hw/
 obj-$(CONFIG_FDT) += device_tree.o
diff --git a/cpus.c b/cpus.c
index 01d128d..53360b0 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1297,20 +1297,6 @@ static void tcg_exec_all(void)
 exit_request = 0;
 }
 
-void set_numa_modes(void)
-{
-CPUState *cpu;
-int i;
-
-CPU_FOREACH(cpu) {
-for (i = 0; i  nb_numa_nodes; i++) {
-if (test_bit(cpu-cpu_index, node_cpumask[i])) {
-cpu-numa_node = i;
-}
-}
-}
-}
-
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
 /* XXX: implement xxx_cpu_list for targets that still miss it */
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 6502488..4f79081 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -23,7 +23,6 @@ extern int smp_threads;
 #define smp_threads 1
 #endif
 
-void set_numa_modes(void);
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
 
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 495dae8..2509649 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -136,6 +136,9 @@ extern QEMUClockType rtc_clock;
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
 extern unsigned long *node_cpumask[MAX_NODES];
+void numa_add(const char *optarg);
+void set_numa_nodes(void);
+void set_numa_modes(void);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
new file mode 100644
index 000..ce7736a
--- /dev/null
+++ b/numa.c
@@ -0,0 +1,182 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2013 Fujitsu Ltd.
+ * Author: Wanlong Gao gaowanl...@cn.fujitsu.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include sysemu/sysemu.h
+
+static void numa_node_parse_cpus(int nodenr, const char *cpus)
+{
+char *endptr;
+unsigned long long value, endvalue;
+
+/* Empty CPU range strings will be considered valid, they will simply
+ * not set any bit in the CPU bitmap.
+ */
+if (!*cpus) {
+return;
+}
+
+if (parse_uint(cpus, value, endptr, 10)  0) {
+goto error;
+}
+if (*endptr == '-') {
+if (parse_uint_full(endptr + 1, endvalue, 10)  0) {
+goto error;
+}
+} else if (*endptr == '\0') {
+endvalue = value;
+} else {
+goto error;
+}
+
+if (endvalue = MAX_CPUMASK_BITS) {
+endvalue = MAX_CPUMASK_BITS - 1;
+fprintf(stderr,
+qemu: NUMA: A max of %d VCPUs are supported\n,
+ MAX_CPUMASK_BITS);
+}
+
+if (endvalue  value) {
+goto error;
+}
+
+bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
+return;
+
+error:
+fprintf(stderr, qemu: Invalid NUMA CPU range: %s\n, cpus);
+exit(1);
+}
+
+void numa_add(const char *optarg)
+{
+char option[128];
+char *endptr;
+unsigned long long nodenr;
+
+optarg = get_opt_name(option, 128, optarg, ',');
+if (*optarg == ',') {
+optarg++;
+}
+if (!strcmp(option, node)) {
+
+if (nb_numa_nodes =

[Qemu-devel] [PATCH V17 00/11] Add support for binding guest numa nodes to host numa nodes

2013-12-04 Thread Wanlong Gao

As you know, QEMU can't direct it's memory allocation now, this may cause
guest cross node access performance regression.
And, the worse thing is that if PCI-passthrough is used,
direct-attached-device uses DMA transfer between device and qemu process.
All pages of the guest will be pinned by get_user_pages().

KVM_ASSIGN_PCI_DEVICE ioctl
  kvm_vm_ioctl_assign_device()
=kvm_assign_device()
  = kvm_iommu_map_memslots()
= kvm_iommu_map_pages()
   = kvm_pin_pages()

So, with direct-attached-device, all guest page's page count will be +1 and
any page migration will not work. AutoNUMA won't too.

So, we should set the guest nodes memory allocation policy before
the pages are really mapped.

According to this patch set, we are able to set guest nodes memory policy
like following:

 -numa node,nodeid=0,cpus=0, \
 -numa mem,size=1024M,policy=membind,host-nodes=0-1 \
 -numa node,nodeid=1,cpus=1 \
 -numa mem,size=1024M,policy=interleave,host-nodes=1

This supports 
policy={default|membind|interleave|preferred},relative=true,host-nodes=N-N 
like format.

And add a QMP command query-numa to show numa info through
this API.

And convert the info numa monitor command to use this
QMP command query-numa.

This version removes set-mem-policy qmp and hmp commands temporarily
as Marcelo and Paolo suggested.


The simple test is like following:
=
Before:
# numactl -H  /qemu/x86_64-softmmu/qemu-system-x86_64 -m 4096  -smp 2 -numa 
node,nodeid=0,cpus=0,mem=2048 -numa node,nodeid=1,cpus=1,mem=2048 -hda 
6u4ga2.qcow2 -enable-kvm -device 
pci-assign,host=07:00.1,id=hostdev0,bus=pci.0,addr=0x7  sleep 40  numactl -H
[1] 13320
available: 2 nodes (0-1)
node 0 cpus: 0 2
node 0 size: 5111 MB
node 0 free: 4653 MB
node 1 cpus: 1 3
node 1 size: 5120 MB
node 1 free: 4764 MB
node distances:
node   0   1 
  0:  10  20 
  1:  20  10 
available: 2 nodes (0-1)
node 0 cpus: 0 2
node 0 size: 5111 MB
node 0 free: 4317 MB
node 1 cpus: 1 3
node 1 size: 5120 MB
node 1 free: 876 MB
node distances:
node   0   1 
  0:  10  20 
  1:  20  10 



After:
# numactl -H  /qemu/x86_64-softmmu/qemu-system-x86_64 -m 4096 -smp 4 -numa 
node,nodeid=0,cpus=0,cpus=2 -numa mem,size=2048M,policy=membind,host-nodes=0 
-numa node,nodeid=0,cpus=1,cpus=3 -numa 
mem,size=2048M,policy=membind,host-nodes=1 -hda 6u4ga2.qcow2 -enable-kvm 
-device pci-assign,host=07:00.1,id=hostdev0,bus=pci.0,addr=0x7  sleep 40  
numactl -H
[1] 10862
available: 2 nodes (0-1)
node 0 cpus: 0 2
node 0 size: 5111 MB
node 0 free: 4718 MB
node 1 cpus: 1 3
node 1 size: 5120 MB
node 1 free: 4799 MB
node distances:
node   0   1 
  0:  10  20 
  1:  20  10 
available: 2 nodes (0-1)
node 0 cpus: 0 2
node 0 size: 5111 MB
node 0 free: 2544 MB
node 1 cpus: 1 3
node 1 size: 5120 MB
node 1 free: 2725 MB
node distances:
node   0   1 
  0:  10  20 
  1:  20  10 
===


V1-V2:
change to use QemuOpts in numa options (Paolo)
handle Error in mpol parser (Paolo)
change qmp command format to mem-policy=membind,mem-hostnode=0-1 like 
(Paolo)
V2-V3:
also handle Error in cpus parser (5/10)
split out common parser from cpus and hostnode parser (Bandan 6/10)
V3-V4:
rebase to request for comments
V4-V5:
use OptVisitor and split -numa option (Paolo)
 - s/set-mpol/set-mem-policy (Andreas)
 - s/mem-policy/policy
 - s/mem-hostnode/host-nodes
fix hmp command process after error (Luiz)
add qmp command query-numa and convert info numa to it (Luiz)
V5-V6:
remove tabs in json file (Laszlo, Paolo)
add back -numa node,mem=xxx as legacy (Paolo)
change cpus and host-nodes to array (Laszlo, Eric)
change nodeid to uint16
add NumaMemPolicy enum type (Eric)
rebased on Laszlo's OptsVisitor: support / flatten integer ranges for 
repeating options patch set, thanks for Laszlo's help
V6-V7:
change UInt16 to uint16 (Laszlo)
fix a typo in adding qmp command set-mem-policy
V7-V8:
rebase to current master with Laszlo's V2 of OptsVisitor patch set
fix an adding white space line error
V8-V9:
rebase to current master
check if total numa memory size is equal to ram_size (Paolo)
add comments to the OptsVisitor stuff in qapi-schema.json (Eric, Laszlo)
replace the use of numa_num_configured_nodes() (Andrew)
avoid abusing the fact i==nodeid (Andrew)
V9-V10:
rebase to current master
remove libnuma (Andrew)
MAX_NODES=64 - MAX_NODES=128 since libnuma selected 128 (Andrew)
use MAX_NODES instead of MAX_CPUMASK_BITS for host_mem bitmap (Andrew)
remove a useless clear_bit() operation (Andrew)
V10-V11:
rebase to current master
fix maxnode argument of mbind(2)
V11-V12:
rebase to current master
split patch 02/11 of V11 (Eduardo)
add some max value check (Eduardo)
split MAX_NODES change patch (Eduardo)
V12-V13:
rebase to current master
thanks for Luiz's review (Luiz)
doc hmp command

[Qemu-devel] [PATCH V17 02/11] NUMA: check if the total numa memory size is equal to ram_size

2013-12-04 Thread Wanlong Gao

If the total number of the assigned numa nodes memory is not
equal to the assigned ram size, it will write the wrong data
to ACPI talb, then the guest will ignore the wrong ACPI table
and recognize all memory to one node. It's buggy, we should
check it to ensure that we write the right data to ACPI table.

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 numa.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/numa.c b/numa.c
index ce7736a..beda80e 100644
--- a/numa.c
+++ b/numa.c
@@ -150,6 +150,16 @@ void set_numa_nodes(void)
 node_mem[i] = ram_size - usedmem;
 }
 
+uint64_t numa_total = 0;
+for (i = 0; i  nb_numa_nodes; i++) {
+numa_total += node_mem[i];
+}
+if (numa_total != ram_size) {
+fprintf(stderr, qemu: numa nodes total memory size 
+should equal to ram_size\n);
+exit(1);
+}
+
 for (i = 0; i  nb_numa_nodes; i++) {
 if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
 break;
-- 
1.8.5

[Qemu-devel] [PATCH V17 10/11] NUMA: add qmp command query-numa

2013-12-04 Thread Wanlong Gao

Add qmp command query-numa to show guest NUMA information.

Reviewed-by: Luiz Capitulino lcapitul...@redhat.com
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 numa.c   | 66 
 qapi-schema.json | 36 +++
 qmp-commands.hx  | 49 +
 3 files changed, 151 insertions(+)

diff --git a/numa.c b/numa.c
index 43bba42..2954709 100644
--- a/numa.c
+++ b/numa.c
@@ -28,6 +28,7 @@
 #include qapi/opts-visitor.h
 #include qapi/dealloc-visitor.h
 #include exec/memory.h
+#include qmp-commands.h
 
 #ifdef __linux__
 #include sys/syscall.h
@@ -340,3 +341,68 @@ void set_numa_modes(void)
 }
 }
 }
+
+NUMANodeList *qmp_query_numa(Error **errp)
+{
+NUMANodeList *head = NULL, *cur_item = NULL;
+CPUState *cpu;
+int i;
+
+for (i = 0; i  nb_numa_nodes; i++) {
+NUMANodeList *info;
+uint16List *cur_cpu_item = NULL;
+info = g_malloc0(sizeof(*info));
+info-value = g_malloc0(sizeof(*info-value));
+info-value-nodeid = i;
+CPU_FOREACH(cpu) {
+if (cpu-numa_node == i) {
+uint16List *node_cpu = g_malloc0(sizeof(*node_cpu));
+node_cpu-value = cpu-cpu_index;
+
+if (!cur_cpu_item) {
+info-value-cpus = cur_cpu_item = node_cpu;
+} else {
+cur_cpu_item-next = node_cpu;
+cur_cpu_item = node_cpu;
+}
+}
+}
+info-value-memory = numa_info[i].node_mem;
+
+#ifdef __linux__
+info-value-policy = numa_info[i].policy;
+info-value-relative = numa_info[i].relative;
+
+unsigned long first, next;
+next = first = find_first_bit(numa_info[i].host_mem, MAX_NODES);
+if (first == MAX_NODES) {
+goto end;
+}
+uint16List *cur_node_item = g_malloc0(sizeof(*cur_node_item));
+cur_node_item-value = first;
+info-value-host_nodes = cur_node_item;
+do {
+next = find_next_bit(numa_info[i].host_mem, MAX_NODES,
+ next + 1);
+if (next == MAX_NODES) {
+break;
+}
+
+uint16List *host_node = g_malloc0(sizeof(*host_node));
+host_node-value = next;
+cur_node_item-next = host_node;
+cur_node_item = host_node;
+} while (true);
+end:
+#endif
+
+if (!cur_item) {
+head = cur_item = info;
+} else {
+cur_item-next = info;
+cur_item = info;
+}
+}
+
+return head;
+}
diff --git a/qapi-schema.json b/qapi-schema.json
index c0dad81..af947e2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4289,3 +4289,39 @@
'*policy': 'NumaNodePolicy',
'*relative':   'bool',
'*host-nodes': ['uint16'] }}
+
+##
+# @NUMANode:
+#
+# Information of guest NUMA node
+#
+# @nodeid: NUMA node ID
+#
+# @cpus: VCPUs contained in this node
+#
+# @memory: memory size of this node
+#
+# @policy: memory policy of this node
+#
+# @relative: if host nodes are relative for memory policy
+#
+# @host-nodes: host nodes for its memory policy
+#
+# Since: 1.7
+#
+##
+{ 'type': 'NUMANode',
+  'data': {'nodeid': 'uint16', 'cpus': ['uint16'], 'memory': 'uint64',
+   'policy': 'NumaNodePolicy', 'relative': 'bool',
+   'host-nodes': ['uint16'] }}
+
+##
+# @query-numa:
+#
+# Returns a list of information about each guest node.
+#
+# Returns: a list of @NUMANode for each guest node
+#
+# Since: 1.7
+##
+{ 'command': 'query-numa', 'returns': ['NUMANode'] }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index fba15cd..c2bc508 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3295,3 +3295,52 @@ Example (2):
 - { return: {} }
 
 EQMP
+
+{
+.name = query-numa,
+.args_type = ,
+.mhandler.cmd_new = qmp_marshal_input_query_numa,
+},
+
+SQMP
+query-numa
+-
+
+Show NUMA information.
+
+Return a json-array. Each NUMA node is represented by a json-object,
+which contains:
+
+- nodeid: NUMA node ID (json-int)
+- cpus: a json-arry of contained VCPUs
+- memory: amount of memory in each node in Byte (json-int)
+- policy: memory policy of this node (json-string)
+- relative: if host nodes is relative for its memory policy (json-bool)
+- host-nodes: a json-array of host nodes for its memory policy
+
+Arguments:
+
+Example:
+
+- { excute: query-numa }
+- { return:[
+{
+nodeid: 0,
+cpus: [0, 1],
+memory: 536870912,
+policy: membind,
+relative: false,
+host-nodes: [0, 1]
+},
+{
+nodeid: 1,
+cpus: [2, 3],
+memory: 536870912,
+policy: interleave,
+relative: false,
+host-nodes: [1]
+}
+ ]
+   }
+
+EQMP
-- 
1.8.5

[Qemu-devel] [PATCH V17 09/11] NUMA: set guest numa nodes memory policy

2013-12-04 Thread Wanlong Gao

Set the guest numa nodes memory policies using the mbind(2)
system call node by node.
After this patch, we are able to set guest nodes memory policies
through the QEMU options, this arms to solve the guest cross
nodes memory access performance issue.
And as you all know, if PCI-passthrough is used,
direct-attached-device uses DMA transfer between device and qemu process.
All pages of the guest will be pinned by get_user_pages().

KVM_ASSIGN_PCI_DEVICE ioctl
  kvm_vm_ioctl_assign_device()
=kvm_assign_device()
  = kvm_iommu_map_memslots()
= kvm_iommu_map_pages()
   = kvm_pin_pages()

So, with direct-attached-device, all guest page's page count will be +1 and
any page migration will not work. AutoNUMA won't too.

So, we should set the guest nodes memory allocation policies before
the pages are really mapped.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 hw/i386/pc.c  |  9 +
 include/exec/memory.h | 15 
 numa.c| 99 +++
 3 files changed, 123 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 74c1f16..07553f2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1178,6 +1178,10 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 memory_region_init_alias(ram_below_4g, NULL, ram-below-4g, ram,
  0, below_4g_mem_size);
 memory_region_add_subregion(system_memory, 0, ram_below_4g);
+if (memory_region_set_mem_policy(ram_below_4g, 0, below_4g_mem_size, 0)) {
+fprintf(stderr, qemu: set below 4g memory policy failed\n);
+exit(1);
+}
 e820_add_entry(0, below_4g_mem_size, E820_RAM);
 if (above_4g_mem_size  0) {
 ram_above_4g = g_malloc(sizeof(*ram_above_4g));
@@ -1185,6 +1189,11 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
  below_4g_mem_size, above_4g_mem_size);
 memory_region_add_subregion(system_memory, 0x1ULL,
 ram_above_4g);
+if (memory_region_set_mem_policy(ram_above_4g, 0, above_4g_mem_size,
+ below_4g_mem_size)) {
+fprintf(stderr, qemu: set above 4g memory policy failed\n);
+exit(1);
+}
 e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM);
 }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf..33de50a 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -905,6 +905,21 @@ void memory_region_transaction_begin(void);
 void memory_region_transaction_commit(void);
 
 /**
+ * memory_region_set_mem_policy: Set memory policy
+ *
+ * Set the memory policy for the specified area.
+ *
+ * @mr: a MemoryRegion we are setting memory policy for
+ * @start: the start offset of the specific region in this MemoryRegion
+ * @length: the specific memory area length
+ * @offset: the start offset of the specific area in NUMA setting
+ */
+int memory_region_set_mem_policy(MemoryRegion *mr,
+ ram_addr_t start,
+ ram_addr_t length,
+ ram_addr_t offset);
+
+/**
  * memory_listener_register: register callbacks to be called when memory
  *   sections are mapped or unmapped into an address
  *   space
diff --git a/numa.c b/numa.c
index da4dbbd..43bba42 100644
--- a/numa.c
+++ b/numa.c
@@ -27,6 +27,16 @@
 #include qapi-visit.h
 #include qapi/opts-visitor.h
 #include qapi/dealloc-visitor.h
+#include exec/memory.h
+
+#ifdef __linux__
+#include sys/syscall.h
+#ifndef MPOL_F_RELATIVE_NODES
+#define MPOL_F_RELATIVE_NODES (1  14)
+#define MPOL_F_STATIC_NODES   (1  15)
+#endif
+#endif
+
 QemuOptsList qemu_numa_opts = {
 .name = numa,
 .implied_opt_name = type,
@@ -228,6 +238,95 @@ void set_numa_nodes(void)
 }
 }
 
+#ifdef __linux__
+static int node_parse_bind_mode(unsigned int nodeid)
+{
+int bind_mode;
+
+switch (numa_info[nodeid].policy) {
+case NUMA_NODE_POLICY_DEFAULT:
+case NUMA_NODE_POLICY_PREFERRED:
+case NUMA_NODE_POLICY_MEMBIND:
+case NUMA_NODE_POLICY_INTERLEAVE:
+bind_mode = numa_info[nodeid].policy;
+break;
+default:
+bind_mode = NUMA_NODE_POLICY_DEFAULT;
+return bind_mode;
+}
+
+bind_mode |= numa_info[nodeid].relative ?
+MPOL_F_RELATIVE_NODES : MPOL_F_STATIC_NODES;
+
+return bind_mode;
+}
+
+static int node_set_mem_policy(void *ram_ptr, ram_addr_t length, int nodeid)
+{
+int bind_mode = node_parse_bind_mode(nodeid);
+unsigned long *nodes = numa_info[nodeid].host_mem;
+
+/* This is a workaround for a long standing bug in Linux'
+ * mbind implementation, which cuts off the last specified
+ * node. To stay compatible should this bug be fixed, we
+ * specify one more node and zero this one out.
+

[Qemu-devel] [PATCH V17 06/11] NUMA: add -numa mem, options

2013-12-04 Thread Wanlong Gao

Add -numa mem, option like following as Paolo suggested:

-numa mem,nodeid=0,size=1G

This new option will make later coming memory hotplug better.

We will use the new options to specify nodes memory info,
and just remain -numa node,mem=xx as legacy.

Reviewed-by: Laszlo Ersek ler...@redhat.com
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 include/sysemu/sysemu.h |  1 +
 numa.c  | 36 
 qemu-options.hx |  6 --
 vl.c|  2 ++
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 20b05a3..291aa6a 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -135,6 +135,7 @@ extern QEMUClockType rtc_clock;
 #define MAX_NODES 64
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
+extern int nb_numa_mem_nodes;
 typedef struct node_info {
 uint64_t node_mem;
 DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
diff --git a/numa.c b/numa.c
index c4fa665..c676c5e 100644
--- a/numa.c
+++ b/numa.c
@@ -74,6 +74,31 @@ static int numa_node_parse(NumaNodeOptions *opts)
 return 0;
 }
 
+static int numa_mem_parse(NumaMemOptions *opts)
+{
+uint16_t nodenr;
+uint64_t mem_size;
+
+if (opts-has_nodeid) {
+nodenr = opts-nodeid;
+} else {
+nodenr = nb_numa_mem_nodes;
+}
+
+if (nodenr = MAX_NODES) {
+fprintf(stderr, qemu: Max number of NUMA nodes reached: %
+PRIu16 \n, nodenr);
+return -1;
+}
+
+if (opts-has_size) {
+mem_size = opts-size;
+numa_info[nodenr].node_mem = mem_size;
+}
+
+return 0;
+}
+
 int numa_init_func(QemuOpts *opts, void *opaque)
 {
 NumaOptions *object = NULL;
@@ -101,6 +126,13 @@ int numa_init_func(QemuOpts *opts, void *opaque)
 }
 nb_numa_nodes++;
 break;
+case NUMA_OPTIONS_KIND_MEM:
+ret = numa_mem_parse(object-mem);
+if (ret) {
+goto error;
+}
+nb_numa_mem_nodes++;
+break;
 default:
 fprintf(stderr, qemu: Invalid NUMA options type.\n);
 ret = -1;
@@ -119,6 +151,10 @@ error:
 
 void set_numa_nodes(void)
 {
+if (nb_numa_mem_nodes  nb_numa_nodes) {
+nb_numa_nodes = nb_numa_mem_nodes;
+}
+
 if (nb_numa_nodes  0) {
 int i;
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 8b94264..e6afb6f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,11 +95,13 @@ specifies the maximum number of hotpluggable CPUs.
 ETEXI
 
 DEF(numa, HAS_ARG, QEMU_OPTION_numa,
--numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n, QEMU_ARCH_ALL)
+-numa node[,nodeid=node][,cpus=cpu[-cpu]]\n
+-numa mem[,nodeid=node][,size=size]\n
+, QEMU_ARCH_ALL)
 STEXI
 @item -numa @var{opts}
 @findex -numa
-Simulate a multi node NUMA system. If mem and cpus are omitted, resources
+Simulate a multi node NUMA system. If @var{size} and @var{cpus} are omitted, 
resources
 are split equally.
 ETEXI
 
diff --git a/vl.c b/vl.c
index e67f34a..064b821 100644
--- a/vl.c
+++ b/vl.c
@@ -250,6 +250,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
 QTAILQ_HEAD_INITIALIZER(fw_boot_order);
 
 int nb_numa_nodes;
+int nb_numa_mem_nodes;
 NodeInfo numa_info[MAX_NODES];
 
 uint8_t qemu_uuid[16];
@@ -2817,6 +2818,7 @@ int main(int argc, char **argv, char **envp)
 }
 
 nb_numa_nodes = 0;
+nb_numa_mem_nodes = 0;
 nb_nics = 0;
 
 bdrv_init_with_whitelist();
-- 
1.8.5

[Qemu-devel] [PATCH V17 07/11] NUMA: expand MAX_NODES from 64 to 128

2013-12-04 Thread Wanlong Gao

libnuma choosed 128 for MAX_NODES, so we follow libnuma here.

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 include/sysemu/sysemu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 291aa6a..807619e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -132,7 +132,7 @@ extern size_t boot_splash_filedata_size;
 extern uint8_t qemu_extra_params_fw[2];
 extern QEMUClockType rtc_clock;
 
-#define MAX_NODES 64
+#define MAX_NODES 128
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
 extern int nb_numa_mem_nodes;
-- 
1.8.5

[Qemu-devel] [PATCH V17 04/11] NUMA: convert -numa option to use OptsVisitor

2013-12-04 Thread Wanlong Gao

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 include/sysemu/sysemu.h |   3 +-
 numa.c  | 148 +++-
 qapi-schema.json|  30 ++
 vl.c|  11 +++-
 4 files changed, 114 insertions(+), 78 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index d873b42..20b05a3 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -140,9 +140,10 @@ typedef struct node_info {
 DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
-void numa_add(const char *optarg);
 void set_numa_nodes(void);
 void set_numa_modes(void);
+extern QemuOptsList qemu_numa_opts;
+int numa_init_func(QemuOpts *opts, void *opaque);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
index 1bc0fad..c4fa665 100644
--- a/numa.c
+++ b/numa.c
@@ -24,101 +24,97 @@
  */
 
 #include sysemu/sysemu.h
-
-static void numa_node_parse_cpus(int nodenr, const char *cpus)
+#include qapi-visit.h
+#include qapi/opts-visitor.h
+#include qapi/dealloc-visitor.h
+QemuOptsList qemu_numa_opts = {
+.name = numa,
+.implied_opt_name = type,
+.head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+.desc = { { 0 } } /* validated with OptsVisitor */
+};
+
+static int numa_node_parse(NumaNodeOptions *opts)
 {
-char *endptr;
-unsigned long long value, endvalue;
-
-/* Empty CPU range strings will be considered valid, they will simply
- * not set any bit in the CPU bitmap.
- */
-if (!*cpus) {
-return;
-}
+uint16_t nodenr;
+uint16List *cpus = NULL;
 
-if (parse_uint(cpus, value, endptr, 10)  0) {
-goto error;
-}
-if (*endptr == '-') {
-if (parse_uint_full(endptr + 1, endvalue, 10)  0) {
-goto error;
-}
-} else if (*endptr == '\0') {
-endvalue = value;
+if (opts-has_nodeid) {
+nodenr = opts-nodeid;
 } else {
-goto error;
+nodenr = nb_numa_nodes;
 }
 
-if (endvalue = MAX_CPUMASK_BITS) {
-endvalue = MAX_CPUMASK_BITS - 1;
-fprintf(stderr,
-qemu: NUMA: A max of %d VCPUs are supported\n,
- MAX_CPUMASK_BITS);
+if (nodenr = MAX_NODES) {
+fprintf(stderr, qemu: Max number of NUMA nodes reached: %
+PRIu16 \n, nodenr);
+return -1;
 }
 
-if (endvalue  value) {
-goto error;
+for (cpus = opts-cpus; cpus; cpus = cpus-next) {
+if (cpus-value  MAX_CPUMASK_BITS) {
+fprintf(stderr, qemu: cpu number % PRIu16  is bigger than %d,
+cpus-value, MAX_CPUMASK_BITS);
+continue;
+}
+bitmap_set(numa_info[nodenr].node_cpu, cpus-value, 1);
 }
 
-bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
-return;
+if (opts-has_mem) {
+int64_t mem_size;
+char *endptr;
+mem_size = strtosz(opts-mem, endptr);
+if (mem_size  0 || *endptr) {
+fprintf(stderr, qemu: invalid numa mem size: %s\n, opts-mem);
+return -1;
+}
+numa_info[nodenr].node_mem = mem_size;
+}
 
-error:
-fprintf(stderr, qemu: Invalid NUMA CPU range: %s\n, cpus);
-exit(1);
+return 0;
 }
 
-void numa_add(const char *optarg)
+int numa_init_func(QemuOpts *opts, void *opaque)
 {
-char option[128];
-char *endptr;
-unsigned long long nodenr;
-
-optarg = get_opt_name(option, 128, optarg, ',');
-if (*optarg == ',') {
-optarg++;
+NumaOptions *object = NULL;
+Error *err = NULL;
+int ret = 0;
+
+{
+OptsVisitor *ov = opts_visitor_new(opts);
+visit_type_NumaOptions(opts_get_visitor(ov), object, NULL, err);
+opts_visitor_cleanup(ov);
 }
-if (!strcmp(option, node)) {
-
-if (nb_numa_nodes = MAX_NODES) {
-fprintf(stderr, qemu: too many NUMA nodes\n);
-exit(1);
-}
 
-if (get_param_value(option, 128, nodeid, optarg) == 0) {
-nodenr = nb_numa_nodes;
-} else {
-if (parse_uint_full(option, nodenr, 10)  0) {
-fprintf(stderr, qemu: Invalid NUMA nodeid: %s\n, option);
-exit(1);
-}
-}
-
-if (nodenr = MAX_NODES) {
-fprintf(stderr, qemu: invalid NUMA nodeid: %llu\n, nodenr);
-exit(1);
-}
+if (error_is_set(err)) {
+fprintf(stderr, qemu: %s\n, error_get_pretty(err));
+error_free(err);
+ret = -1;
+goto error;
+}
 
-if (get_param_value(option, 128, mem, optarg) == 0) {
-numa_info[nodenr].node_mem = 0;
-} else {
-int64_t sval;
-sval = strtosz(option, endptr);
-if (sval  0 || *endptr) {
-fprintf(stderr, qemu: invalid numa mem size: %s\n, optarg);
-exit(1);
-

[Qemu-devel] [PATCH V17 03/11] NUMA: Add numa_info structure to contain numa nodes info

2013-12-04 Thread Wanlong Gao

Add the numa_info structure to contain the numa nodes memory,
VCPUs information and the future added numa nodes host memory
policies.

Reviewed-by: Eduardo Habkost ehabk...@redhat.com
Signed-off-by: Andre Przywara andre.przyw...@amd.com
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 hw/i386/pc.c| 12 
 include/sysemu/sysemu.h |  8 ++--
 monitor.c   |  2 +-
 numa.c  | 23 ---
 vl.c|  7 +++
 5 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 12c436e..74c1f16 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -670,14 +670,14 @@ static FWCfgState *bochs_bios_init(void)
 unsigned int apic_id = x86_cpu_apic_id_from_index(i);
 assert(apic_id  apic_id_limit);
 for (j = 0; j  nb_numa_nodes; j++) {
-if (test_bit(i, node_cpumask[j])) {
+if (test_bit(i, numa_info[j].node_cpu)) {
 numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
 break;
 }
 }
 }
 for (i = 0; i  nb_numa_nodes; i++) {
-numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
+numa_fw_cfg[apic_id_limit + 1 + i] = 
cpu_to_le64(numa_info[i].node_mem);
 }
 fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
  (1 + apic_id_limit + nb_numa_nodes) *
@@ -1072,8 +1072,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t 
below_4g_mem_size,
 guest_info-apic_id_limit = pc_apic_id_limit(max_cpus);
 guest_info-apic_xrupt_override = kvm_allows_irq0_override();
 guest_info-numa_nodes = nb_numa_nodes;
-guest_info-node_mem = g_memdup(node_mem, guest_info-numa_nodes *
+guest_info-node_mem = g_malloc0(guest_info-numa_nodes *
 sizeof *guest_info-node_mem);
+for (i = 0; i  nb_numa_nodes; i++) {
+guest_info-node_mem[i] = numa_info[i].node_mem;
+}
+
 guest_info-node_cpu = g_malloc0(guest_info-apic_id_limit *
  sizeof *guest_info-node_cpu);
 
@@ -1081,7 +1085,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t 
below_4g_mem_size,
 unsigned int apic_id = x86_cpu_apic_id_from_index(i);
 assert(apic_id  guest_info-apic_id_limit);
 for (j = 0; j  nb_numa_nodes; j++) {
-if (test_bit(i, node_cpumask[j])) {
+if (test_bit(i, numa_info[j].node_cpu)) {
 guest_info-node_cpu[apic_id] = j;
 break;
 }
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 2509649..d873b42 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -9,6 +9,7 @@
 #include qapi-types.h
 #include qemu/notify.h
 #include qemu/main-loop.h
+#include qemu/bitmap.h
 
 /* vl.c */
 
@@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock;
 #define MAX_NODES 64
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
-extern uint64_t node_mem[MAX_NODES];
-extern unsigned long *node_cpumask[MAX_NODES];
+typedef struct node_info {
+uint64_t node_mem;
+DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+} NodeInfo;
+extern NodeInfo numa_info[MAX_NODES];
 void numa_add(const char *optarg);
 void set_numa_nodes(void);
 void set_numa_modes(void);
diff --git a/monitor.c b/monitor.c
index 845f608..b97b7d3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
 }
 monitor_printf(mon, \n);
 monitor_printf(mon, node %d size: % PRId64  MB\n, i,
-node_mem[i]  20);
+numa_info[i].node_mem  20);
 }
 }
 
diff --git a/numa.c b/numa.c
index beda80e..1bc0fad 100644
--- a/numa.c
+++ b/numa.c
@@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus)
 goto error;
 }
 
-bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
+bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
 return;
 
 error:
@@ -101,7 +101,7 @@ void numa_add(const char *optarg)
 }
 
 if (get_param_value(option, 128, mem, optarg) == 0) {
-node_mem[nodenr] = 0;
+numa_info[nodenr].node_mem = 0;
 } else {
 int64_t sval;
 sval = strtosz(option, endptr);
@@ -109,7 +109,7 @@ void numa_add(const char *optarg)
 fprintf(stderr, qemu: invalid numa mem size: %s\n, optarg);
 exit(1);
 }
-node_mem[nodenr] = sval;
+numa_info[nodenr].node_mem = sval;
 }
 if (get_param_value(option, 128, cpus, optarg) != 0) {
 numa_node_parse_cpus(nodenr, option);
@@ -134,7 +134,7 @@ void set_numa_nodes(void)
  * and distribute the available memory equally across all nodes
  */
 for (i = 0; i  nb_numa_nodes; i++) {
-if (node_mem[i] != 0)
+if (numa_info[i].node_mem != 0)
 break;
 }

[Qemu-devel] [PATCH V17 08/11] NUMA: parse guest numa nodes memory policy

2013-12-04 Thread Wanlong Gao

The memory policy setting format is like:
policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N
And we are adding this setting as a suboption of -numa mem,,
the memory policy then can be set like following:
-numa node,nodeid=0,cpus=0 \
-numa node,nodeid=1,cpus=1 \
-numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
-numa mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 include/sysemu/sysemu.h |  3 +++
 numa.c  | 18 ++
 qapi-schema.json| 33 +++--
 vl.c|  3 +++
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 807619e..82f1447 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -139,6 +139,9 @@ extern int nb_numa_mem_nodes;
 typedef struct node_info {
 uint64_t node_mem;
 DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+DECLARE_BITMAP(host_mem, MAX_NODES);
+NumaNodePolicy policy;
+bool relative;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 void set_numa_nodes(void);
diff --git a/numa.c b/numa.c
index c676c5e..da4dbbd 100644
--- a/numa.c
+++ b/numa.c
@@ -78,6 +78,7 @@ static int numa_mem_parse(NumaMemOptions *opts)
 {
 uint16_t nodenr;
 uint64_t mem_size;
+uint16List *nodes;
 
 if (opts-has_nodeid) {
 nodenr = opts-nodeid;
@@ -96,6 +97,23 @@ static int numa_mem_parse(NumaMemOptions *opts)
 numa_info[nodenr].node_mem = mem_size;
 }
 
+if (opts-has_policy) {
+numa_info[nodenr].policy = opts-policy;
+}
+
+if (opts-has_relative) {
+numa_info[nodenr].relative = opts-relative;
+}
+
+for (nodes = opts-host_nodes; nodes; nodes = nodes-next) {
+if (nodes-value  MAX_NODES) {
+fprintf(stderr, qemu: node number % PRIu16  is bigger than 
%d\n,
+nodes-value, MAX_NODES);
+continue;
+}
+bitmap_set(numa_info[nodenr].host_mem, nodes-value, 1);
+}
+
 return 0;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 1043e57..c0dad81 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4246,6 +4246,26 @@
'*mem':'str' }}
 
 ##
+# @NumaNodePolicy
+#
+# NUMA node policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred node for allocation
+#
+# @membind: a strict policy that restricts memory allocation to the
+#   nodes specified
+#
+# @interleave: the page allocations is interleaved across the set
+#  of nodes specified
+#
+# Since 1.7
+##
+{ 'enum': 'NumaNodePolicy',
+  'data': [ 'default', 'preferred', 'membind', 'interleave' ] }
+
+##
 # @NumaMemOptions
 #
 # Set memory information of guest NUMA node. (for OptsVisitor)
@@ -4254,9 +4274,18 @@
 #
 # @size: #optional memory size of this node
 #
+# @policy: #optional memory policy of this node
+#
+# @relative: #optional if the nodes specified are relative
+#
+# @host-nodes: #optional host nodes for its memory policy
+#
 # Since 1.7
 ##
 { 'type': 'NumaMemOptions',
   'data': {
-   '*nodeid': 'uint16',
-   '*size':   'size' }}
+   '*nodeid': 'uint16',
+   '*size':   'size',
+   '*policy': 'NumaNodePolicy',
+   '*relative':   'bool',
+   '*host-nodes': ['uint16'] }}
diff --git a/vl.c b/vl.c
index 064b821..95d03f5 100644
--- a/vl.c
+++ b/vl.c
@@ -2815,6 +2815,9 @@ int main(int argc, char **argv, char **envp)
 for (i = 0; i  MAX_NODES; i++) {
 numa_info[i].node_mem = 0;
 bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
+bitmap_zero(numa_info[i].host_mem, MAX_NODES);
+numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT;
+numa_info[i].relative = false;
 }
 
 nb_numa_nodes = 0;
-- 
1.8.5

[Qemu-devel] [PATCH V17 05/11] NUMA: introduce NumaMemOptions

2013-12-04 Thread Wanlong Gao

Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 qapi-schema.json | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index db539b6..1043e57 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4223,7 +4223,8 @@
 ##
 { 'union': 'NumaOptions',
   'data': {
-'node': 'NumaNodeOptions' }}
+'node': 'NumaNodeOptions',
+'mem' : 'NumaMemOptions' }}
 
 ##
 # @NumaNodeOptions
@@ -4243,3 +4244,19 @@
'*nodeid': 'uint16',
'*cpus':   ['uint16'],
'*mem':'str' }}
+
+##
+# @NumaMemOptions
+#
+# Set memory information of guest NUMA node. (for OptsVisitor)
+#
+# @nodeid: #optional NUMA node ID
+#
+# @size: #optional memory size of this node
+#
+# Since 1.7
+##
+{ 'type': 'NumaMemOptions',
+  'data': {
+   '*nodeid': 'uint16',
+   '*size':   'size' }}
-- 
1.8.5

[Qemu-devel] [PATCH V17 11/11] NUMA: convert hmp command info_numa to use qmp command query_numa

2013-12-04 Thread Wanlong Gao

Reviewed-by: Luiz Capitulino lcapitul...@redhat.com
Signed-off-by: Wanlong Gao gaowanl...@cn.fujitsu.com
---
 hmp.c | 57 +
 hmp.h |  1 +
 monitor.c | 21 +
 3 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/hmp.c b/hmp.c
index 32ee285..d6dedd2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -24,6 +24,10 @@
 #include ui/console.h
 #include block/qapi.h
 #include qemu-io.h
+#include qapi-visit.h
+#include qapi/opts-visitor.h
+#include qapi/dealloc-visitor.h
+#include sysemu/sysemu.h
 
 static void hmp_handle_error(Monitor *mon, Error **errp)
 {
@@ -1564,3 +1568,56 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 
 hmp_handle_error(mon, err);
 }
+
+void hmp_info_numa(Monitor *mon, const QDict *qdict)
+{
+NUMANodeList *node_list, *node;
+uint16List *head;
+int nodeid;
+char *policy_str = NULL;
+
+node_list = qmp_query_numa(NULL);
+
+monitor_printf(mon, %d nodes\n, nb_numa_nodes);
+for (node = node_list; node; node = node-next) {
+nodeid = node-value-nodeid;
+monitor_printf(mon, node %d cpus:, nodeid);
+head = node-value-cpus;
+for (head = node-value-cpus; head != NULL; head = head-next) {
+monitor_printf(mon,  %d, (int)head-value);
+}
+monitor_printf(mon, \n);
+monitor_printf(mon, node %d size: % PRId64  MB\n,
+   nodeid, node-value-memory  20);
+switch (node-value-policy) {
+case NUMA_NODE_POLICY_DEFAULT:
+policy_str = g_strdup(default);
+break;
+case NUMA_NODE_POLICY_PREFERRED:
+policy_str = g_strdup(preferred);
+break;
+case NUMA_NODE_POLICY_MEMBIND:
+policy_str = g_strdup(membind);
+break;
+case NUMA_NODE_POLICY_INTERLEAVE:
+policy_str = g_strdup(interleave);
+break;
+default:
+break;
+}
+monitor_printf(mon, node %d policy: %s\n,
+   nodeid, policy_str ? :  );
+if (policy_str) {
+free(policy_str);
+}
+monitor_printf(mon, node %d relative: %s\n, nodeid,
+   node-value-relative ? true : false);
+monitor_printf(mon, node %d host-nodes:, nodeid);
+for (head = node-value-host_nodes; head != NULL; head = head-next) {
+monitor_printf(mon,  %d, (int)head-value);
+}
+monitor_printf(mon, \n);
+}
+
+qapi_free_NUMANodeList(node_list);
+}
diff --git a/hmp.h b/hmp.h
index 54cf71f..4f8d39b 100644
--- a/hmp.h
+++ b/hmp.h
@@ -37,6 +37,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict);
 void hmp_info_pci(Monitor *mon, const QDict *qdict);
 void hmp_info_block_jobs(Monitor *mon, const QDict *qdict);
 void hmp_info_tpm(Monitor *mon, const QDict *qdict);
+void hmp_info_numa(Monitor *mon, const QDict *qdict);
 void hmp_quit(Monitor *mon, const QDict *qdict);
 void hmp_stop(Monitor *mon, const QDict *qdict);
 void hmp_system_reset(Monitor *mon, const QDict *qdict);
diff --git a/monitor.c b/monitor.c
index b97b7d3..f747a48 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1989,25 +1989,6 @@ static void do_info_mtree(Monitor *mon, const QDict 
*qdict)
 mtree_info((fprintf_function)monitor_printf, mon);
 }
 
-static void do_info_numa(Monitor *mon, const QDict *qdict)
-{
-int i;
-CPUState *cpu;
-
-monitor_printf(mon, %d nodes\n, nb_numa_nodes);
-for (i = 0; i  nb_numa_nodes; i++) {
-monitor_printf(mon, node %d cpus:, i);
-CPU_FOREACH(cpu) {
-if (cpu-numa_node == i) {
-monitor_printf(mon,  %d, cpu-cpu_index);
-}
-}
-monitor_printf(mon, \n);
-monitor_printf(mon, node %d size: % PRId64  MB\n, i,
-numa_info[i].node_mem  20);
-}
-}
-
 #ifdef CONFIG_PROFILER
 
 int64_t qemu_time;
@@ -2775,7 +2756,7 @@ static mon_cmd_t info_cmds[] = {
 .args_type  = ,
 .params = ,
 .help   = show NUMA information,
-.mhandler.cmd = do_info_numa,
+.mhandler.cmd = hmp_info_numa,
 },
 {
 .name   = usb,
-- 
1.8.5

[Qemu-devel] [PATCH v8 1/5] vmstate: add VMSTATE_PTIMER_ARRAY

2013-12-04 Thread liguang

Signed-off-by: liguang lig.f...@cn.fujitsu.com
---
 include/migration/vmstate.h |4 
 savevm.c|   31 +++
 2 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 9d09e60..977cf52 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -165,6 +165,7 @@ extern const VMStateInfo vmstate_info_timer;
 extern const VMStateInfo vmstate_info_buffer;
 extern const VMStateInfo vmstate_info_unused_buffer;
 extern const VMStateInfo vmstate_info_bitmap;
+extern const VMStateInfo vmstate_info_ptimer;
 
 #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
 #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
@@ -613,6 +614,9 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_TIMER_ARRAY(_f, _s, _n)  \
 VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *)
 
+#define VMSTATE_PTIMER_ARRAY(_f, _s, _n)\
+VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_ptimer, ptimer_state 
*)
+
 #define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \
 VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
 
diff --git a/savevm.c b/savevm.c
index 2f631d4..54dbb33 100644
--- a/savevm.c
+++ b/savevm.c
@@ -30,6 +30,7 @@
 #include monitor/monitor.h
 #include sysemu/sysemu.h
 #include qemu/timer.h
+#include hw/ptimer.h
 #include audio/audio.h
 #include migration/migration.h
 #include qemu/sockets.h
@@ -1362,6 +1363,36 @@ const VMStateInfo vmstate_info_timer = {
 .put  = put_timer,
 };
 
+static int get_ptimer(QEMUFile *f, void *pv, size_t size)
+{
+ptimer_state *v = pv;
+uint64_t count;
+
+count = qemu_get_be64(f);
+if (count != -1) {
+ptimer_set_count(v, count);
+} else {
+ptimer_stop(v);
+}
+
+return 0;
+}
+
+static void put_ptimer(QEMUFile *f, void *pv, size_t size)
+{
+ptimer_state *v = pv;
+uint64_t count;
+
+count = ptimer_get_count(v);
+qemu_put_be64(f, count);
+}
+
+const VMStateInfo vmstate_info_ptimer = {
+.name = ptimer,
+.get  = get_ptimer,
+.put  = put_ptimer,
+};
+
 /* uint8_t buffers */
 
 static int get_buffer(QEMUFile *f, void *pv, size_t size)
-- 
1.7.2.5

[Qemu-devel] [PATCH v8 2/5] hw/timer: add allwinner a10 timer

2013-12-04 Thread liguang

Signed-off-by: liguang lig.f...@cn.fujitsu.com
---
 default-configs/arm-softmmu.mak  |2 +
 hw/timer/Makefile.objs   |2 +
 hw/timer/allwinner-a10-pit.c |  253 ++
 include/hw/timer/allwinner-a10-pit.h |   57 
 4 files changed, 314 insertions(+), 0 deletions(-)
 create mode 100644 hw/timer/allwinner-a10-pit.c
 create mode 100644 include/hw/timer/allwinner-a10-pit.h

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index a555eef..0029596 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -81,3 +81,5 @@ CONFIG_VERSATILE_I2C=y
 
 CONFIG_SDHCI=y
 CONFIG_INTEGRATOR_DEBUG=y
+
+CONFIG_ALLWINNER_A10=y
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index eca5905..3020388 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -27,3 +27,5 @@ obj-$(CONFIG_SH4) += sh_timer.o
 obj-$(CONFIG_TUSB6010) += tusb6010.o
 
 obj-$(CONFIG_MC146818RTC) += mc146818rtc.o
+
+obj-$(CONFIG_ALLWINNER_A10) += allwinner-a10-pit.o
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
new file mode 100644
index 000..2305813
--- /dev/null
+++ b/hw/timer/allwinner-a10-pit.c
@@ -0,0 +1,253 @@
+/*
+ * Allwinner A10 timer device emulation
+ *
+ * Copyright (C) 2013 Li Guang
+ * Written by Li Guang lig.f...@cn.fujitsu.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include hw/sysbus.h
+#include sysemu/sysemu.h
+#include hw/timer/allwinner-a10-pit.h
+
+
+static uint64_t a10_pit_read(void *opaque, hwaddr offset, unsigned size)
+{
+AwA10PITState *s = AW_A10_PIT(opaque);
+uint8_t index;
+
+switch (offset) {
+case AW_A10_PIT_TIMER_IRQ_EN:
+return s-irq_enable;
+case AW_A10_PIT_TIMER_IRQ_ST:
+return s-irq_status;
+case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE * 6 + 
AW_A10_PIT_TIMER_COUNT:
+index = offset  0xf0;
+index = 4;
+index -= 1;
+switch (offset  0x0f) {
+case AW_A10_PIT_TIMER_CONTROL:
+return s-control[index];
+case AW_A10_PIT_TIMER_INTERVAL:
+return s-interval[index];
+case AW_A10_PIT_TIMER_COUNT:
+s-count[index] = ptimer_get_count(s-timer[index]);
+return s-count[index];
+default:
+qemu_log_mask(LOG_GUEST_ERROR,
+  %s: Bad offset 0x%x\n,  __func__, (int)offset);
+break;
+}
+case AW_A10_PIT_WDOG_CONTROL:
+break;
+case AW_A10_PIT_WDOG_MODE:
+break;
+case AW_A10_PIT_COUNT_LO:
+return s-count_lo;
+case AW_A10_PIT_COUNT_HI:
+return s-count_hi;
+case AW_A10_PIT_COUNT_CTL:
+return s-count_ctl;
+default:
+qemu_log_mask(LOG_GUEST_ERROR,
+  %s: Bad offset 0x%x\n,  __func__, (int)offset);
+break;
+}
+
+return 0;
+}
+
+static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
+unsigned size)
+{
+ AwA10PITState *s = AW_A10_PIT(opaque);
+ uint8_t index;
+
+switch (offset) {
+case AW_A10_PIT_TIMER_IRQ_EN:
+s-irq_enable = value;
+break;
+case AW_A10_PIT_TIMER_IRQ_ST:
+s-irq_status = ~value;
+break;
+case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE * 6 + 
AW_A10_PIT_TIMER_COUNT:
+index = offset  0xf0;
+index = 4;
+index -= 1;
+switch (offset  0x0f) {
+case AW_A10_PIT_TIMER_CONTROL:
+s-control[index] = value;
+if (s-control[index]  AW_A10_PIT_TIMER_RELOAD) {
+ptimer_set_count(s-timer[index], s-interval[index]);
+}
+if (s-control[index]  AW_A10_PIT_TIMER_EN) {
+int oneshot = 0;
+if (s-control[index]  AW_A10_PIT_TIMER_MODE) {
+oneshot = 1;
+}
+ptimer_run(s-timer[index], oneshot);
+} else {
+ptimer_stop(s-timer[index]);
+}
+break;
+case AW_A10_PIT_TIMER_INTERVAL:
+s-interval[index] = value;
+ptimer_set_limit(s-timer[index], s-interval[index], 1);
+break;
+case AW_A10_PIT_TIMER_COUNT:
+s-count[index] = value;
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR,
+  %s: Bad offset 0x%x\n,  __func__, (int)offset);
+}
+break;
+case

[Qemu-devel] [PATCH v8 4/5] hw/arm: add allwinner a10 SoC support

2013-12-04 Thread liguang

Signed-off-by: liguang lig.f...@cn.fujitsu.com
---
 hw/arm/Makefile.objs   |2 +-
 hw/arm/allwinner-a10.c |   77 
 include/hw/arm/allwinner-a10.h |   36 ++
 3 files changed, 114 insertions(+), 1 deletions(-)
 create mode 100644 hw/arm/allwinner-a10.c
 create mode 100644 include/hw/arm/allwinner-a10.h

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 3671b42..b9e5983 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
 obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o
 
 obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
-obj-y += omap1.o omap2.o strongarm.o
+obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
new file mode 100644
index 000..c4699b7
--- /dev/null
+++ b/hw/arm/allwinner-a10.c
@@ -0,0 +1,77 @@
+#include hw/sysbus.h
+#include hw/devices.h
+#include hw/arm/allwinner-a10.h
+
+
+static void aw_a10_init(Object *obj)
+{
+AwA10State *s = AW_A10(obj);
+DeviceState *dev;
+
+object_initialize(s-cpu, sizeof(s-cpu), cortex-a8- TYPE_ARM_CPU);
+object_property_add_child(obj, cpu, OBJECT(s-cpu), NULL);
+
+object_initialize(s-intc, sizeof(s-timer), TYPE_AW_A10_PIC);
+dev = DEVICE(s-intc);
+qdev_set_parent_bus(dev, sysbus_get_default());
+
+object_initialize(s-timer, sizeof(s-timer), TYPE_AW_A10_PIT);
+dev = DEVICE(s-timer);
+qdev_set_parent_bus(dev, sysbus_get_default());
+}
+
+static void aw_a10_realize(DeviceState *dev, Error **errp)
+{
+AwA10State *s = AW_A10(dev);
+SysBusDevice *sysbusdev;
+uint8_t i;
+Error *err = NULL;
+
+object_property_set_bool(OBJECT(s-cpu), true, realized, err);
+s-cpu_irq[0] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_IRQ);
+s-cpu_irq[1] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_FIQ);
+
+object_property_set_bool(OBJECT(s-intc), true, realized, err);
+sysbusdev = SYS_BUS_DEVICE(s-intc);
+sysbus_mmio_map(sysbusdev, 0, AW_A10_PIC_REG_BASE);
+sysbus_connect_irq(sysbusdev, 0, s-cpu_irq[0]);
+sysbus_connect_irq(sysbusdev, 1, s-cpu_irq[1]);
+for (i = 0; i  AW_A10_PIC_INT_NR; i++) {
+s-irq[i] = qdev_get_gpio_in(DEVICE(s-intc), i);
+}
+
+object_property_set_bool(OBJECT(s-timer), true, realized, err);
+sysbusdev = SYS_BUS_DEVICE(s-timer);
+sysbus_mmio_map(sysbusdev, 0, AW_A10_PIT_REG_BASE);
+sysbus_connect_irq(sysbusdev, 0, s-irq[22]);
+sysbus_connect_irq(sysbusdev, 1, s-irq[23]);
+sysbus_connect_irq(sysbusdev, 2, s-irq[24]);
+sysbus_connect_irq(sysbusdev, 3, s-irq[25]);
+sysbus_connect_irq(sysbusdev, 4, s-irq[67]);
+sysbus_connect_irq(sysbusdev, 5, s-irq[68]);
+
+serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, s-irq[1],
+   115200, serial_hds[0], DEVICE_NATIVE_ENDIAN);
+}
+
+static void aw_a10_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+dc-realize = aw_a10_realize;
+}
+
+static const TypeInfo aw_a10_type_info = {
+.name = TYPE_AW_A10,
+.parent = TYPE_DEVICE,
+.instance_size = sizeof(AwA10State),
+.instance_init = aw_a10_init,
+.class_init = aw_a10_class_init,
+};
+
+static void aw_a10_register_types(void)
+{
+type_register_static(aw_a10_type_info);
+}
+
+type_init(aw_a10_register_types)
diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h
new file mode 100644
index 000..a3e7b77
--- /dev/null
+++ b/include/hw/arm/allwinner-a10.h
@@ -0,0 +1,36 @@
+#ifndef ALLWINNER_H_
+
+#include qemu-common.h
+#include qemu/error-report.h
+#include hw/char/serial.h
+#include hw/arm/arm.h
+#include hw/timer/allwinner-a10-pit.h
+#include hw/intc/allwinner-a10-pic.h
+
+#include sysemu/sysemu.h
+#include exec/address-spaces.h
+
+
+#define AW_A10_PIC_REG_BASE 0x01c20400
+#define AW_A10_PIT_REG_BASE 0x01c20c00
+#define AW_A10_UART0_REG_BASE   0x01c28000
+
+#define AW_A10_SDRAM_BASE   0x4000
+
+#define TYPE_AW_A10 allwiner-a10
+#define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10)
+
+typedef struct AwA10State {
+/* private */
+DeviceState parent_obj;
+/* public */
+
+ARMCPU cpu;
+qemu_irq irq[AW_A10_PIC_INT_NR];
+qemu_irq cpu_irq[2];
+AwA10PITState timer;
+AwA10PICState intc;
+} AwA10State;
+
+#define ALLWINNER_H_
+#endif
-- 
1.7.2.5

[Qemu-devel] [PATCH v8 5/5] hw/arm: add cubieboard support

2013-12-04 Thread liguang

Signed-off-by: liguang lig.f...@cn.fujitsu.com
---
 hw/arm/Makefile.objs |2 +-
 hw/arm/cubieboard.c  |   52 ++
 2 files changed, 53 insertions(+), 1 deletions(-)
 create mode 100644 hw/arm/cubieboard.c

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index b9e5983..8be8d8e 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
 obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o
 
 obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
-obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o
+obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o cubieboard.o
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
new file mode 100644
index 000..0599061
--- /dev/null
+++ b/hw/arm/cubieboard.c
@@ -0,0 +1,52 @@
+#include hw/sysbus.h
+#include hw/devices.h
+#include hw/boards.h
+#include hw/arm/allwinner-a10.h
+
+
+static struct arm_boot_info cubieboard_binfo = {
+.loader_start = AW_A10_SDRAM_BASE,
+.board_id = 0x1008,
+};
+
+typedef struct CubieBoardState {
+AwA10State *a10;
+MemoryRegion sdram;
+} CubieBoardState;
+
+static void cubieboard_init(QEMUMachineInitArgs *args)
+{
+CubieBoardState *s = g_new(CubieBoardState, 1);
+Error *err = NULL;
+
+s-a10 = AW_A10(object_new(TYPE_AW_A10));
+object_property_set_bool(OBJECT(s-a10), true, realized, err);
+if (err != NULL) {
+error_report(Couldn't realize Allwinner A10: %s\n,
+error_get_pretty(err));
+exit(1);
+}
+
+memory_region_init_ram(s-sdram, NULL, cubieboard.ram, args-ram_size);
+vmstate_register_ram_global(s-sdram);
+memory_region_add_subregion(get_system_memory(), AW_A10_SDRAM_BASE,
+s-sdram);
+cubieboard_binfo.ram_size = args-ram_size;
+cubieboard_binfo.kernel_filename = args-kernel_filename;
+cubieboard_binfo.kernel_cmdline = args-kernel_cmdline;
+arm_load_kernel(s-a10-cpu, cubieboard_binfo);
+}
+
+static QEMUMachine cubieboard_machine = {
+.name = cubieboard,
+.desc = cubietec cubieboard emulation,
+.init = cubieboard_init,
+};
+
+
+static void cubieboard_machine_init(void)
+{
+qemu_register_machine(cubieboard_machine);
+}
+
+machine_init(cubieboard_machine_init)
-- 
1.7.2.5

[Qemu-devel] [PATCH v8 3/5] hw/intc: add allwinner A10 interrupt controller

2013-12-04 Thread liguang

Signed-off-by: liguang lig.f...@cn.fujitsu.com
---
 hw/intc/Makefile.objs   |1 +
 hw/intc/allwinner-a10-pic.c |  218 +++
 include/hw/intc/allwinner-a10-pic.h |   40 +++
 3 files changed, 259 insertions(+), 0 deletions(-)
 create mode 100644 hw/intc/allwinner-a10-pic.c
 create mode 100644 include/hw/intc/allwinner-a10-pic.h

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 47ac442..ec977c4 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -24,3 +24,4 @@ obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
 obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_ALLWINNER_A10) += allwinner-a10-pic.o
diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c
new file mode 100644
index 000..fdc9e4e
--- /dev/null
+++ b/hw/intc/allwinner-a10-pic.c
@@ -0,0 +1,218 @@
+/*
+ * Allwinner A10 interrupt controller device emulation
+ *
+ * Copyright (C) 2013 Li Guang
+ * Written by Li Guang lig.f...@cn.fujitsu.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include hw/sysbus.h
+#include hw/devices.h
+#include sysemu/sysemu.h
+#include hw/intc/allwinner-a10-pic.h
+
+
+static void aw_a10_pic_update(AwA10PICState *s)
+{
+uint8_t i, j;
+bool irq = false, fiq = false;
+
+for (i = 0, j = 0; i  AW_A10_PIC_REG_NUM; i++) {
+if (s-irq_pending[i] == 0  s-fiq_pending[i] == 0) {
+continue;
+}
+for (j = 0; j  32; j++) {
+if (test_bit(j, (void *)s-mask[i])) {
+continue;
+}
+if (test_bit(j, (void *)s-irq_pending[i])) {
+irq = true;
+}
+if (test_bit(j, (void *)s-fiq_pending[i]) 
+test_bit(j, (void *)s-select[i])) {
+fiq = true;
+}
+if (irq  fiq) {
+goto out;
+}
+}
+}
+
+out:
+qemu_set_irq(s-parent_irq, irq);
+qemu_set_irq(s-parent_fiq, fiq);
+}
+
+static void aw_a10_pic_set_irq(void *opaque, int irq, int level)
+{
+AwA10PICState *s = opaque;
+
+if (level) {
+set_bit(irq%32, (void *)s-irq_pending[irq/32]);
+}
+aw_a10_pic_update(s);
+}
+
+static uint64_t aw_a10_pic_read(void *opaque, hwaddr offset, unsigned size)
+{
+AwA10PICState *s = opaque;
+uint8_t index = (offset  0xc)/4;
+
+switch (offset) {
+case AW_A10_PIC_VECTOR:
+return s-vector;
+case AW_A10_PIC_BASE_ADDR:
+return s-base_addr;
+case AW_A10_PIC_PROTECT:
+return s-protect;
+case AW_A10_PIC_NMI:
+return s-nmi;
+case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8:
+return s-irq_pending[index];
+case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8:
+return s-fiq_pending[index];
+case AW_A10_PIC_SELECT ... AW_A10_PIC_SELECT + 8:
+return s-select[index];
+case AW_A10_PIC_ENABLE ... AW_A10_PIC_ENABLE + 8:
+return s-enable[index];
+case AW_A10_PIC_MASK ... AW_A10_PIC_MASK + 8:
+return s-mask[index];
+default:
+qemu_log_mask(LOG_GUEST_ERROR,
+  %s: Bad offset 0x%x\n,  __func__, (int)offset);
+break;
+}
+
+return 0;
+}
+
+static void aw_a10_pic_write(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+AwA10PICState *s = opaque;
+uint8_t index = (offset  0xc)/4;
+
+switch (offset) {
+case AW_A10_PIC_VECTOR:
+s-vector = value  ~0x3;
+break;
+case AW_A10_PIC_BASE_ADDR:
+s-base_addr = value  ~0x3;
+case AW_A10_PIC_PROTECT:
+s-protect = value;
+break;
+case AW_A10_PIC_NMI:
+s-nmi = value;
+break;
+case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8:
+s-irq_pending[index] = ~value;
+break;
+case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8:
+s-fiq_pending[index] = ~value;
+break;
+case AW_A10_PIC_SELECT ... AW_A10_PIC_SELECT + 8:
+s-select[index] = value;
+break;
+case AW_A10_PIC_ENABLE ... AW_A10_PIC_ENABLE + 8:
+s-enable[index] = value;
+break;
+case AW_A10_PIC_MASK ... AW_A10_PIC_MASK + 8:
+s-mask[index] = value;
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR,
+  %s: Bad offset 0x%x\n,  __func__, (int)offset);
+break;
+}
+
+

[Qemu-devel] [PATCH v8 0/5] add allwinner A10 SoC support

2013-12-04 Thread liguang


lay a foundation for allwinner A10 SoC with a cortex-a8
processor, and will add more devices later.

v2: split timer and interrupt controller emulation into
their corresponding files.

v3: 
1. change loader_start address
2. add 64-bit counter
3. fixup fail to clear interrup status issue

v4:
1. add VMSD
2. use defines of magic number for readability
3. code cleanup

v5:
1. add VMSTATE_PTIMER_ARRAY
2. code cleanup

v6:
1. fix a fiq lost issue pointed out by Peter Crosthwaite
2. code cleanup

v7:
model allwinner A10 as a SoC device,
and add cubieboard.

v8:
1. A10 be QOMified as a device
2. add AW as prefix of A10


TODO:
1. add BROM support
2. add more devices

test:
can boot-up officially released linux kernel build with
PLL disabled.

reference:
http://linux-sunxi.org/Main_Page


Li Guang (5)
 vmstate: add VMSTATE_PTIMER_ARRAY
 hw/timer: add allwinner a10 timer
 hw/intc: add allwinner A10 interrupt controller
 hw/arm: add allwinner a10 SoC support
 hw/arm: add cubieboard support

default-configs/arm-softmmu.mak  |   2 +
hw/arm/Makefile.objs |   4 +-
hw/arm/allwinner-a10.c   |  39 
+++
hw/arm/cubieboard.c  |  33 +
hw/intc/Makefile.objs|   1 +
hw/intc/allwinner-a10_pic.c  | 218 +++
hw/timer/Makefile.objs   |   2 +
hw/timer/allwinner-a10_pit.c | 253 ++
include/hw/arm/allwinner-a10.h   |  27 +++
include/hw/intc/allwinner-a10_pic.h  |  40 +++
include/hw/timer/allwinner-a10_pit.h |  57 
include/migration/vmstate.h  |   4 
savevm.c |  31 +++
13 files changed, 709 insertions(+), 2 deletions(-)
 create mode 100644 hw/timer/allwinner-a10_pit.c
 create mode 100644 include/hw/timer/allwinner-a10_pit.h
 create mode 100644 hw/intc/allwinner-a10_pic.c
 create mode 100644 include/hw/intc/allwinner-a10_pic.h
 create mode 100644 hw/arm/allwinner-a10.c
 create mode 100644 include/hw/arm/allwinner-a10.h
 create mode 100644 hw/arm/cubieboard.c

Re: [Qemu-devel] gpu and console chicken and egg

2013-12-04 Thread Gerd Hoffmann

On Mi, 2013-12-04 at 17:02 +1000, Dave Airlie wrote:
 So I've hit a bit of a init ordering issue that I'm not sure how best to 
 solve,
 
 Just some background:
 In order for the virt GPU and the UI layer (SDL or GTK etc) to
 interact properly over OpenGL use, I have created and OpenGL provider
 in the console, and the UI layer can register callbacks for a single
 GL provider (only one makes sense really) when it starts up. This is
 mainly to be used for context management and swap buffers management.
 
 Now in the virtio GPU I'd was going to use a virtio feature to say
 whether the qemu hw can support the 3D renderer, dependant on whether
 it was linked with the virgl renderer and whether the current UI was
 GL capable.

Hmm, why does it depend on the UI?  Wasn't the plan to render into a
dma-buf no matter what?  Then either read the rendered result from the
dmabuf (non-gl UI like vnc) or let the (gl-capable) UI pass the dma-buf
to the compositor?

Also note that the virtio-gpu gl-capability needs to be configurable for
live migration reasons, so you can migrate between hosts with different
3d capabilities.  Something like -device
virtio-gpu,gl={none,gl2,gl3,host} where none turns it off, gl
$version specifies the gl support level and host makes it depend on
the host capabilities (simliar to -cpu host).  For starters you can
leave out host and depend on the user set this.

 So is there a method to modify the advertised feature
 bits later in the setup sequence before the guest is started?

You can register a callback to be notified when the guest is
started/stopped (qemu_add_vm_change_state_handler).  That could be used
although it is a bit hackish.

cheers,
  Gerd

PS: Now that 1.7 is out of the door and 2.0 tree is open for development
we should start getting the bits which are ready merged to make your
patch backlog smaller.  SDL2 would be a good start I think.

Re: [Qemu-devel] [PATCH v2 0/2]

2013-12-04 Thread Markus Armbruster

Your cover letter lacks a proper subject, and your patches lack proper
threading.

git-send-email threads properly (unless you mess up its configuration).

Re: [Qemu-devel] [PATCH 07/23] hw/pci/pcie_aer.c: fix buffer overruns on invalid state load

2013-12-04 Thread Michael S. Tsirkin

On Tue, Dec 03, 2013 at 08:59:52PM +, Peter Maydell wrote:
 On 3 December 2013 20:41, Michael S. Tsirkin m...@redhat.com wrote:
  On Tue, Dec 03, 2013 at 06:30:46PM +, Peter Maydell wrote:
  On 3 December 2013 16:28, Michael S. Tsirkin m...@redhat.com wrote:
const VMStateDescription vmstate_pcie_aer_log = {
.name = PCIE_AER_ERROR_LOG,
.version_id = 1,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
   +.post_load = pcie_aer_state_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT16(log_num, PCIEAERLog),
   -VMSTATE_UINT16(log_max, PCIEAERLog),
   -VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
   +VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
   +VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_max,
  vmstate_pcie_aer_err, PCIEAERErr),
VMSTATE_END_OF_LIST()
}
 
  Isn't this a migration compability break?
 
  How is it a break?
 
 If a QEMU with this patch sends data to a QEMU without it, then the
 receiving end will think it should expect log_num array entries but the
 sending end is going to send log_max of them. Conversely, an old-new
 migration is going to send fewer array entries than the destination
 expects. Or have I misinterpreted how the VARRAY entries work?
 
 thanks
 -- PMM

Ah, got it. You are right, good catch. I think we need VMSTATE_UINT16_LE, this 
will
make sure log_num = log_max without changing
VMSTATE_STRUCT_VARRAY size.

Re: [Qemu-devel] [PATCH v2 2/2] target-i386: Intel MPX

2013-12-04 Thread Paolo Bonzini

Il 04/12/2013 08:56, Liu, Jinsong ha scritto:
 From 256484fd75d4eb4d248e5e0f493f16182da59dc2 Mon Sep 17 00:00:00 2001
 From: Liu Jinsong jinsong@intel.com
 Date: Wed, 4 Dec 2013 16:56:49 +0800
 Subject: [PATCH v2 2/2] target-i386: Intel MPX
 
 Add some MPX related definiation, and hardcode sizes and offsets
 of xsave features 3 and 4. It also add corresponding part to
 kvm_get/put_xsave.
 
 Signed-off-by: Liu Jinsong jinsong@intel.com
 ---
  target-i386/cpu.c |4 
  target-i386/cpu.h |   24 +---
  target-i386/kvm.c |   10 ++
  3 files changed, 35 insertions(+), 3 deletions(-)
 
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index 544b57f..52ca029 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -336,6 +336,10 @@ typedef struct ExtSaveArea {
  static const ExtSaveArea ext_save_areas[] = {
  [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
  .offset = 0x240, .size = 0x100 },
 +[3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
 +.offset = 0x3c0, .size = 0x40  },
 +[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
 +.offset = 0x400, .size = 0x10  },
  };
  
  const char *get_register_name_32(unsigned int reg)
 diff --git a/target-i386/cpu.h b/target-i386/cpu.h
 index ea373e8..4020591 100644
 --- a/target-i386/cpu.h
 +++ b/target-i386/cpu.h
 @@ -380,9 +380,12 @@
  
  #define MSR_VM_HSAVE_PA 0xc0010117
  
 -#define XSTATE_FP   1
 -#define XSTATE_SSE  2
 -#define XSTATE_YMM  4
 +#define XSTATE_FP   (1ULL  0)
 +#define XSTATE_SSE  (1ULL  1)
 +#define XSTATE_YMM  (1ULL  2)
 +#define XSTATE_BNDREGS  (1ULL  3)
 +#define XSTATE_BNDCSR   (1ULL  4)
 +
  
  /* CPUID feature words */
  typedef enum FeatureWord {
 @@ -545,6 +548,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
  #define CPUID_7_0_EBX_ERMS (1  9)
  #define CPUID_7_0_EBX_INVPCID  (1  10)
  #define CPUID_7_0_EBX_RTM  (1  11)
 +#define CPUID_7_0_EBX_MPX  (1  14)
  #define CPUID_7_0_EBX_RDSEED   (1  18)
  #define CPUID_7_0_EBX_ADX  (1  19)
  #define CPUID_7_0_EBX_SMAP (1  20)
 @@ -695,6 +699,18 @@ typedef union {
  uint64_t q;
  } MMXReg;
  
 +typedef struct BNDReg {
 +uint64_t lb;
 +uint64_t ub;
 +} BNDReg;
 +
 +typedef struct BNDCSReg {
 +uint64_t cfg;
 +uint64_t pad;
 +uint64_t sts_lo;
 +uint64_t sts_hi;
 +} BNDCSReg;
 +
  #ifdef HOST_WORDS_BIGENDIAN
  #define XMM_B(n) _b[15 - (n)]
  #define XMM_W(n) _w[7 - (n)]
 @@ -912,6 +928,8 @@ typedef struct CPUX86State {
  
  uint64_t xstate_bv;
  XMMReg ymmh_regs[CPU_NB_REGS];
 +BNDReg bnd_regs[4];
 +BNDCSReg bndcs_regs;
  
  uint64_t xcr0;
  
 diff --git a/target-i386/kvm.c b/target-i386/kvm.c
 index 749aa09..347d3d3 100644
 --- a/target-i386/kvm.c
 +++ b/target-i386/kvm.c
 @@ -980,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu)
  #define XSAVE_XMM_SPACE   40
  #define XSAVE_XSTATE_BV   128
  #define XSAVE_YMMH_SPACE  144
 +#define XSAVE_BNDREGS 240
 +#define XSAVE_BNDCSR  256
  
  static int kvm_put_xsave(X86CPU *cpu)
  {
 @@ -1012,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu)
  *(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv;
  memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs,
  sizeof env-ymmh_regs);
 +memcpy(xsave-region[XSAVE_BNDREGS], env-bnd_regs,
 +sizeof env-bnd_regs);
 +memcpy(xsave-region[XSAVE_BNDCSR], env-bndcs_regs,
 +sizeof(env-bndcs_regs));
  r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
  return r;
  }
 @@ -1294,6 +1300,10 @@ static int kvm_get_xsave(X86CPU *cpu)
  env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV];
  memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE],
  sizeof env-ymmh_regs);
 +memcpy(env-bnd_regs, xsave-region[XSAVE_BNDREGS],
 +sizeof env-bnd_regs);
 +memcpy(env-bndcs_regs, xsave-region[XSAVE_BNDCSR],
 +sizeof(env-bndcs_regs));
  return 0;
  }

Almost there.  Migration (vmstate) is still missing.

Paolo

Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP

2013-12-04 Thread Paolo Bonzini

Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 Normally the user is expected to eject DVD if it is not locked by
 the guest. eject_device() makes few checks and calls bdrv_close()
 if DVD is not in use.
 
 However it is still possible to eject DVD even if it is in use.
 For that, QEMU sets eject requested flag, the guest reads it, issues
 ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case,
 bdrv_close() is not called anywhere so it remains inserted in QEMU's
 terms.

This is expected behavior, and matches what IDE does.

Markus, can you confirm?

Paolo

Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL

2013-12-04 Thread Paolo Bonzini

Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 This prevents the guest from preventing DVD medium removal when
 there is no medium.
 
 Without this, if the user has ejected a DVD, it is possible to
 have a block device with an open tray, no media but locked.

I need to check this against a real disk.  Thanks!

Paolo

[Qemu-devel] [PATCH V7 5/6] qemu-iotests: add test for snapshot in qemu-img convert

2013-12-04 Thread Wenchao Xia

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
 tests/qemu-iotests/058 |   19 ++-
 tests/qemu-iotests/058.out |   12 
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
index cf50857..14584cd 100755
--- a/tests/qemu-iotests/058
+++ b/tests/qemu-iotests/058
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Test export internal snapshot by qemu-nbd.
+# Test export internal snapshot by qemu-nbd, convert it by qemu-img.
 #
 # Copyright (C) 2013 IBM, Inc.
 #
@@ -54,6 +54,8 @@ _wait_for_nbd()
 exit 1
 }
 
+converted_image=$TEST_IMG.converted
+
 _export_nbd_snapshot()
 {
 _cleanup_nbd
@@ -74,6 +76,7 @@ _cleanup()
 {
 _cleanup_nbd
 _cleanup_test_img
+rm -f $converted_image
 }
 trap _cleanup; exit \$status 0 1 2 3 15
 
@@ -115,6 +118,20 @@ echo == verifying the exported snapshot with patterns, 
method 2 ==
 $QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $nbd_snapshot_img | _filter_qemu_io
 $QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $nbd_snapshot_img | _filter_qemu_io
 
+$QEMU_IMG convert $TEST_IMG -l sn1 -O qcow2 $converted_image
+
+echo
+echo == verifying the converted snapshot with patterns, method 1 ==
+$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io
+$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io
+
+$QEMU_IMG convert $TEST_IMG -l snapshot.name=sn1 -O qcow2 $converted_image
+
+echo
+echo == verifying the converted snapshot with patterns, method 2 ==
+$QEMU_IO -c 'read -P 0xa 0x1000 0x1000' $converted_image | _filter_qemu_io
+$QEMU_IO -c 'read -P 0xb 0x2000 0x1000' $converted_image | _filter_qemu_io
+
 # success, all done
 echo *** done
 rm -f $seq.full
diff --git a/tests/qemu-iotests/058.out b/tests/qemu-iotests/058.out
index 768ac61..9a69379 100644
--- a/tests/qemu-iotests/058.out
+++ b/tests/qemu-iotests/058.out
@@ -29,4 +29,16 @@ read 4096/4096 bytes at offset 4096
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 4096/4096 bytes at offset 8192
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verifying the converted snapshot with patterns, method 1 ==
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 8192
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verifying the converted snapshot with patterns, method 2 ==
+read 4096/4096 bytes at offset 4096
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 8192
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
-- 
1.7.1

[Qemu-devel] [PATCH V7 1/6] snapshot: distinguish id and name in load_tmp

2013-12-04 Thread Wenchao Xia

Since later this function will be used so improve it. The only caller of it
now is qemu-img, and it is not impacted by introduce function
bdrv_snapshot_load_tmp_by_id_or_name() that call bdrv_snapshot_load_tmp()
twice to keep old search logic. bdrv_snapshot_load_tmp_by_id_or_name() return
int to let caller know the errno, and errno will be used later.
Also fix a typo in comments of bdrv_snapshot_delete().

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
 block/qcow2-snapshot.c|   10 ++-
 block/qcow2.h |5 +++-
 block/snapshot.c  |   59 ++--
 include/block/block_int.h |4 ++-
 include/block/snapshot.h  |7 -
 qemu-img.c|8 -
 6 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 3529c68..ad8bf3d 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -675,7 +675,10 @@ int qcow2_snapshot_list(BlockDriverState *bs, 
QEMUSnapshotInfo **psn_tab)
 return s-nb_snapshots;
 }
 
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
+int qcow2_snapshot_load_tmp(BlockDriverState *bs,
+const char *snapshot_id,
+const char *name,
+Error **errp)
 {
 int i, snapshot_index;
 BDRVQcowState *s = bs-opaque;
@@ -687,8 +690,10 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const 
char *snapshot_name)
 assert(bs-read_only);
 
 /* Search the snapshot */
-snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
+snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
 if (snapshot_index  0) {
+error_setg(errp,
+   Can't find snapshot);
 return -ENOENT;
 }
 sn = s-snapshots[snapshot_index];
@@ -699,6 +704,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const 
char *snapshot_name)
 
 ret = bdrv_pread(bs-file, sn-l1_table_offset, new_l1_table, 
new_l1_bytes);
 if (ret  0) {
+error_setg(errp, Failed to read l1 table for snapshot);
 g_free(new_l1_table);
 return ret;
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index 922e190..303eb26 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -488,7 +488,10 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
   const char *name,
   Error **errp);
 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs,
+const char *snapshot_id,
+const char *name,
+Error **errp);
 
 void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
diff --git a/block/snapshot.c b/block/snapshot.c
index a05c0c0..565222e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -194,7 +194,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
  * If only @snapshot_id is specified, delete the first one with id
  * @snapshot_id.
  * If only @name is specified, delete the first one with name @name.
- * if none is specified, return -ENINVAL.
+ * if none is specified, return -EINVAL.
  *
  * Returns: 0 on success, -errno on failure. If @bs is not inserted, return
  * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs
@@ -265,18 +265,71 @@ int bdrv_snapshot_list(BlockDriverState *bs,
 return -ENOTSUP;
 }
 
+/**
+ * Temporarily load an internal snapshot by @snapshot_id and @name.
+ * @bs: block device used in the operation
+ * @snapshot_id: unique snapshot ID, or NULL
+ * @name: snapshot name, or NULL
+ * @errp: location to store error
+ *
+ * If both @snapshot_id and @name are specified, load the first one with
+ * id @snapshot_id and name @name.
+ * If only @snapshot_id is specified, load the first one with id
+ * @snapshot_id.
+ * If only @name is specified, load the first one with name @name.
+ * if none is specified, return -EINVAL.
+ *
+ * Returns: 0 on success, -errno on fail. If @bs is not inserted, return
+ * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support
+ * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and
+ * @name, return -ENOENT. If @errp != NULL, it will always be filled on
+ * failure.
+ */
 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-const char *snapshot_name)
+   const char *snapshot_id,
+   const char *name,
+   Error **errp)
 {
 BlockDriver *drv = bs-drv;
+
 if (!drv) {
+error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
 return -ENOMEDIUM;
 }
+if (!snapshot_id  !name) {
+error_setg(errp, snapshot_id and name are both NULL);
+return

[Qemu-devel] [PATCH V7 4/6] qemu-img: add -l for snapshot in convert

2013-12-04 Thread Wenchao Xia

Now qemu-img convert have similar options as qemu-nbd for internal
snapshot.

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
 qemu-img-cmds.hx |4 ++--
 qemu-img.c   |   44 +++-
 qemu-img.texi|   12 
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index da1d965..d029609 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -34,9 +34,9 @@ STEXI
 ETEXI
 
 DEF(convert, img_convert,
-convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o 
options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] 
output_filename)
+convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o 
options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename 
[filename2 [...]] output_filename)
 STEXI
-@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O 
@var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S 
@var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O 
@var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l 
@var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} 
[...]] @var{output_filename}
 ETEXI
 
 DEF(info, img_info,
diff --git a/qemu-img.c b/qemu-img.c
index a73beb5..ee940c2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -93,6 +93,11 @@ static void help(void)
  'options' is a comma separated list of format specific options 
in a\n
name=value format. Use -o ? for an overview of the options 
supported by the\n
used format\n
+ 'snapshot_param' is param used for internal snapshot, format\n
+   is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n
+   '[ID_OR_NAME]'\n
+ 'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n
+   instead\n
  '-c' indicates that target image must be compressed (qcow format 
only)\n
  '-u' enables unsafe rebasing. It is assumed that old and new 
backing file\n
   match exactly. The image doesn't need a working backing 
file before\n
@@ -1140,6 +1145,7 @@ static int img_convert(int argc, char **argv)
 int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
 bool quiet = false;
 Error *local_err = NULL;
+QemuOpts *sn_opts = NULL;
 
 fmt = NULL;
 out_fmt = raw;
@@ -1148,7 +1154,7 @@ static int img_convert(int argc, char **argv)
 compress = 0;
 skip_create = 0;
 for(;;) {
-c = getopt(argc, argv, f:O:B:s:hce6o:pS:t:qn);
+c = getopt(argc, argv, f:O:B:s:hce6o:pS:t:qnl:);
 if (c == -1) {
 break;
 }
@@ -1183,6 +1189,18 @@ static int img_convert(int argc, char **argv)
 case 's':
 snapshot_name = optarg;
 break;
+case 'l':
+if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+sn_opts = qemu_opts_parse(internal_snapshot_opts, optarg, 0);
+if (!sn_opts) {
+error_report(Failed in parsing snapshot param '%s',
+ optarg);
+return 1;
+}
+} else {
+snapshot_name = optarg;
+}
+break;
 case 'S':
 {
 int64_t sval;
@@ -1254,7 +1272,12 @@ static int img_convert(int argc, char **argv)
 total_sectors += bs_sectors;
 }
 
-if (snapshot_name != NULL) {
+if (sn_opts) {
+ret = bdrv_snapshot_load_tmp(bs[0],
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ local_err);
+} else if (snapshot_name != NULL) {
 if (bs_n  1) {
 error_report(No support for concatenating multiple snapshot);
 ret = -1;
@@ -1262,13 +1285,13 @@ static int img_convert(int argc, char **argv)
 }
 
 bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, local_err);
-if (error_is_set(local_err)) {
-error_report(Failed to load snapshot: %s,
- error_get_pretty(local_err));
-error_free(local_err);
-ret = -1;
-goto out;
-}
+}
+if (error_is_set(local_err)) {
+error_report(Failed to load snapshot: %s,
+ error_get_pretty(local_err));
+error_free(local_err);
+ret = -1;
+goto out;
 }
 
 /* Find driver and parse its options */
@@ -1559,6 +1582,9 @@ out:
 free_option_parameters(create_options);
 free_option_parameters(param);
 qemu_vfree(buf);
+if (sn_opts) {
+qemu_opts_del(sn_opts);
+}
 if (out_bs) {
 bdrv_unref(out_bs);

Re: [Qemu-devel] [RFC PATCH v1 0/5] Add error_abort and associated cleanups

2013-12-04 Thread Markus Armbruster

Eric Blake ebl...@redhat.com writes:

 On 12/03/2013 01:33 PM, Igor Mammedov wrote:

 Also, is it worth adding asserts and/or compiler annotations to require
 that the Error **err argument of functions be non-NULL, to ensure that
 callers are always passing either a valid destination or one of the
 special addresses?  But doing so would probably require adding a special
 address for error_ignore for callers that intend to discard an error in
 cases where the return type of the function lets them know to proceed
 with a fallback implementation (that is, cases where ignoring an error
 makes sense).

 Right now, we use NULL as ignore errors argument.

 NULL gives us a chance to express caller must not ignore errors via
 some non-null annotation that gets fed to a static analyzer.

 I doubt that would be possible with a special error_ignore object.

 Anyway, this series is about abort on error.  Let's keep ignore
 errors issues separate.
 I'm sorry for hijacking thread, but that actually an issue that started an
 original discussion.
 Where void returning QOM API functions are used with NULL, without any chance
 to detect that error happened. So abusing NULL errp in this functions
 might lead to hard to find runtime errors.
 I think Eric's suggestion was to enforce passing non NULL errp and let caller
 to deal with error gracefully so that above mentioned misuse was impossible.
 Why is ignoring errors from void foo(...) like API considered acceptable?

 Okay, so it sounds like consensus is that using NULL as the means to
 ignore errors is okay when there is an alternative way to detect error,
 but that for any function that returns void, adding an assert(errp)
 would be appropriate because the caller cannot safely ignore the
 failure.  It's not worth inventing an error_ignore special address, but
 for functions that have no way to report errors except via errp, then it
 IS worth enforcing that the caller is either prepared to handle the
 error or has passed error_abort (or any other special addresses we add
 later).

No objection to asserting that the caller passed an error object when
the error object is the only way to signal failure.  You can't force
your callers to check for failure, but the assertion could help prevent
accidental misuse.

Assertions fire at run-time, though.

Asserting argument not null first thing in the function should enable
a sufficiently smart whole-program static checker to flag null
arguments.

But having such a static check right at compile-time would be much
better.  Could attribute nonnull do it?  If yes, do we still need the
assertion?

Re: [Qemu-devel] [PATCH] qemu-img create: add -o nocow option

2013-12-04 Thread Stefan Hajnoczi

On Mon, Dec 2, 2013 at 4:54 AM, Chunyan Liu cy...@suse.com wrote:
 2013/11/25 Kevin Wolf kw...@redhat.com

 Am 21.11.2013 um 09:51 hat Stefan Hajnoczi geschrieben:
  On Thu, Nov 21, 2013 at 11:33:56AM +0800, Chunyan Liu wrote:
   2013/11/20 Stefan Hajnoczi stefa...@gmail.com
  
On Wed, Nov 20, 2013 at 04:50:29PM +0800, Chunyan Liu wrote:
  block/cow.c   |   22 ++
  block/qcow.c  |   22 ++
  block/qcow2.c |   22 ++
   
I think you can avoid modifying all the image formats:
   
.bdrv_create() functions pass options to bdrv_create_file().
Therefore
an image format like qcow2 does not need to parse the nocow option
itself.  Only raw-posix.c:.bdrv_create() needs to know about the
nocow
option.
   
   
   In existing code, options passed to bdrv_create_file contains no
   option in
   fact.
  
   And if we pass all options to bdrv_create_file directly, raw-posix.c:
   .bdrv_create() will get NOCOW option but at the same time get SIZE
   option,
   it
   will create a file with total size. For cow/qcow/qcow2, I suppose it's
   not
   expected? In current code, bdrv_create_file will create a zero-sized
   image
   for
   cow/qcow/qcow2.
 
  I see what the problem is: the loop that parses options does options++.
  So by the time it has processed them the options pointer will be NULL or
  point to a terminator option (option-name == NULL).
 
  I was confused because there has been a patch series which changes
  .bdrv_create() option parsing.  But I forgot it hasn't been merged.
  https://lists.gnu.org/archive/html/qemu-devel/2013-08/msg01695.html
 
  Kevin: Have you looked at .bdrv_create() QEMUOptionParameter removal
  recently?  Otherwise I'm inclined to merge Chunyan's patch - we can
  always refactor the code later when QEMUOptionParameter is removed.

 No, I haven't. I think the QemuOpts conversion series was relatively
 close, but it seems nobody is working on it any more. Someone should
 probably pick it up and finish it.


 Hi, Kevin  Stefan,
 About the nocow option, according to current status, how should we proceed?
 Any
 further work needs to do?

We talked on IRC about rebasing the QEMUOptionParameter series.  Let's
get the in so the 'nocow' change can be made cleanly.

Stefan

[Qemu-devel] [PATCH V7 0/6] export internal snapshot by qemu-nbd

2013-12-04 Thread Wenchao Xia

This series allow user to read internal snapshot's contents without qemu-img
convert.

V2:
  Address Stefan's comments:
  02: add 'fall through' comments in the case statement.
  03: add doc about the difference of internal snapshot and backing chain
snapshot, which is used in previous '--snapshot' parameter.
  Other:
  01,04: rebased on upstream with conflict resolved.

v3:
  Address Paolo's comments:
  02: add parameter -l snapshot_id_or_name, rename options
snapshot-load to load-snapshot, use QemuOpts.
  03: rename snapshot-load to load-snapshot.
  04: related change to test both -l and -L case.
  05-07: add similar parameter for qemu-img convert.
  Other:
  01: foldered original snapshot logic into function
bdrv_snapshot_load_tmp_by_id_or_name(), since multiple
caller present in this version. Refined error message from
, reason: %s to : %s.
  02: Refined error message from , reason: %s to : %s.
  03: Rename PARAM to SNAPSHOT_PARAM.

v4:
  Address Eric's comments:
  01: typo fix.
  02: squashed patch. Use only one option -l and distiguish
the cases by the starting string.
  03: remove 'eval', remove '_supported_os Linux', remove the
comments that have typo.
  04: squashed patch. Add only one option -l and distiguish
the cases by the starting string.
  05: remove indentation.
  Address Paolo's comments:
  02: Use only one option -l and distiguish the cases by
the starting string.
  04: Use only one option -l and distiguish the cases by
the starting string, mark old -s as deprecated, added doc
for them.

v5:
  Address Jeff's comments:
  Quote image name in test cases.

v6:
  Address Kevin's comments:
  1: typo fix, remove device and snapshot info in error message.
  2: use strstart().
  3: use _require_command(), limit proto to file, since when proto=nbd
it can't work. also changed _require_command() to tip better.
  4: use strstart().
  6: new patch, since I found the doc missing in debugging.

v7:
  Address Stefan's comments:
  3: use unix socket and wait for 30s, to make the case reliable, folder
  4: add missing doc change.
  Other:
  3,5: add label method 1/method 2 in echo message, to tip better.

Wenchao Xia (6):
  1 snapshot: distinguish id and name in load_tmp
  2 qemu-nbd: support internal snapshot export
  3 qemu-iotests: add 058 internal snapshot export with qemu-nbd case
  4 qemu-img: add -l for snapshot in convert
  5 qemu-iotests: add test for snapshot in qemu-img convert
  6 qemu-nbd: add doc for option -f

 block/qcow2-snapshot.c   |   10 +++-
 block/qcow2.h|5 +-
 block/snapshot.c |   77 ++-
 include/block/block_int.h|4 +-
 include/block/snapshot.h |   15 -
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |   44 +++--
 qemu-img.texi|   12 +++-
 qemu-nbd.c   |   47 ++-
 qemu-nbd.texi|   10 +++-
 tests/qemu-iotests/058   |  138 ++
 tests/qemu-iotests/058.out   |   44 +
 tests/qemu-iotests/check |1 +
 tests/qemu-iotests/common.rc |3 +-
 tests/qemu-iotests/group |1 +
 15 files changed, 390 insertions(+), 25 deletions(-)
 create mode 100755 tests/qemu-iotests/058
 create mode 100644 tests/qemu-iotests/058.out

[Qemu-devel] [PATCH V7 2/6] qemu-nbd: support internal snapshot export

2013-12-04 Thread Wenchao Xia

Now it is possible to directly export an internal snapshot, which
can be used to probe the snapshot's contents without qemu-img
convert.

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
 block/snapshot.c |   18 ++
 include/block/snapshot.h |8 
 qemu-nbd.c   |   46 --
 qemu-nbd.texi|8 +++-
 4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index 565222e..9047f8d 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,6 +25,24 @@
 #include block/snapshot.h
 #include block/block_int.h
 
+QemuOptsList internal_snapshot_opts = {
+.name = snapshot,
+.head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head),
+.desc = {
+{
+.name = SNAPSHOT_OPT_ID,
+.type = QEMU_OPT_STRING,
+.help = snapshot id
+},{
+.name = SNAPSHOT_OPT_NAME,
+.type = QEMU_OPT_STRING,
+.help = snapshot name
+},{
+/* end of list */
+}
+},
+};
+
 int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name)
 {
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index d05bea7..770d9bb 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -27,6 +27,14 @@
 
 #include qemu-common.h
 #include qapi/error.h
+#include qemu/option.h
+
+
+#define SNAPSHOT_OPT_BASE   snapshot.
+#define SNAPSHOT_OPT_ID snapshot.id
+#define SNAPSHOT_OPT_NAME   snapshot.name
+
+extern QemuOptsList internal_snapshot_opts;
 
 typedef struct QEMUSnapshotInfo {
 char id_str[128]; /* unique snapshot id */
diff --git a/qemu-nbd.c b/qemu-nbd.c
index c26c98e..fe6053c 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -20,6 +20,7 @@
 #include block/block.h
 #include block/nbd.h
 #include qemu/main-loop.h
+#include block/snapshot.h
 
 #include stdarg.h
 #include stdio.h
@@ -79,7 +80,14 @@ static void usage(const char *name)
 \n
 Block device options:\n
   -r, --read-only  export read-only\n
-  -s, --snapshot   use snapshot file\n
+  -s, --snapshot   use FILE as an external snapshot, create a temporary\n
+   file with backing_file=FILE, redirect the write to\n
+   the temporary one\n
+  -l, --load-snapshot=SNAPSHOT_PARAM\n
+   load an internal snapshot inside FILE and export it\n
+   as an read-only device, SNAPSHOT_PARAM format is\n
+   'snapshot.id=[ID],snapshot.name=[NAME]', or\n
+   '[ID_OR_NAME]'\n
   -n, --nocachedisable host cache\n
   --cache=MODE set cache mode (none, writeback, ...)\n
 #ifdef CONFIG_LINUX_AIO
@@ -315,7 +323,9 @@ int main(int argc, char **argv)
 char *device = NULL;
 int port = NBD_DEFAULT_PORT;
 off_t fd_size;
-const char *sopt = hVb:o:p:rsnP:c:dvk:e:f:t;
+QemuOpts *sn_opts = NULL;
+const char *sn_id_or_name = NULL;
+const char *sopt = hVb:o:p:rsnP:c:dvk:e:f:tl:;
 struct option lopt[] = {
 { help, 0, NULL, 'h' },
 { version, 0, NULL, 'V' },
@@ -328,6 +338,7 @@ int main(int argc, char **argv)
 { connect, 1, NULL, 'c' },
 { disconnect, 0, NULL, 'd' },
 { snapshot, 0, NULL, 's' },
+{ load-snapshot, 1, NULL, 'l' },
 { nocache, 0, NULL, 'n' },
 { cache, 1, NULL, QEMU_NBD_OPT_CACHE },
 #ifdef CONFIG_LINUX_AIO
@@ -428,6 +439,17 @@ int main(int argc, char **argv)
 errx(EXIT_FAILURE, Offset must be positive `%s', optarg);
 }
 break;
+case 'l':
+if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
+sn_opts = qemu_opts_parse(internal_snapshot_opts, optarg, 0);
+if (!sn_opts) {
+errx(EXIT_FAILURE, Failed in parsing snapshot param `%s',
+ optarg);
+}
+} else {
+sn_id_or_name = optarg;
+}
+/* fall through */
 case 'r':
 nbdflags |= NBD_FLAG_READ_ONLY;
 flags = ~BDRV_O_RDWR;
@@ -581,6 +603,22 @@ int main(int argc, char **argv)
 error_get_pretty(local_err));
 }
 
+if (sn_opts) {
+ret = bdrv_snapshot_load_tmp(bs,
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
+ qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
+ local_err);
+} else if (sn_id_or_name) {
+ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
+   local_err);
+}
+if (ret  0) {
+errno = -ret;
+err(EXIT_FAILURE,
+Failed to load snapshot: %s,
+error_get_pretty(local_err));
+}
+
 fd_size = bdrv_getlength(bs);

Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP

2013-12-04 Thread Markus Armbruster

Paolo Bonzini pbonz...@redhat.com writes:

 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 Normally the user is expected to eject DVD if it is not locked by
 the guest. eject_device() makes few checks and calls bdrv_close()
 if DVD is not in use.
 
 However it is still possible to eject DVD even if it is in use.
 For that, QEMU sets eject requested flag, the guest reads it, issues
 ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case,
 bdrv_close() is not called anywhere so it remains inserted in QEMU's
 terms.

 This is expected behavior, and matches what IDE does.

 Markus, can you confirm?

Confirmed.  See commit 4be9762.

Alexey, monitor commands eject does two things: it first opens the tray,
and if that works, it removes the medium.

If the tray is locked closed, it tells the device model that eject was
requested.  Works just like the physical eject button.

With -f, it then rips out the medium.  This is similar to opening the
tray with a unbent paperclip.  Let's ignore this case.

The scsi-cd device model tells the guest about the eject request.  A
well-behaved guest will then command the device to unlock and open the
tray.

The guest uses the same commands on behalf of its applications,
e.g. /usr/bin/eject.

Your patch changes behavior of eject /dev/sr0  eject -t /dev/sr0:
you no longer get the same medium back.  You normally do with real
hardware.

The somewhat unfortunate consequence is that monitor command eject can
only remove the medium when the tray is not locked.

Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL

2013-12-04 Thread Markus Armbruster

Paolo Bonzini pbonz...@redhat.com writes:

 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 This prevents the guest from preventing DVD medium removal when
 there is no medium.
 
 Without this, if the user has ejected a DVD, it is possible to
 have a block device with an open tray, no media but locked.

 I need to check this against a real disk.  Thanks!

If I remember correctly, I did, and it works.  You can still close the
open tray in this state.

[Qemu-devel] [PATCH V7 6/6] qemu-nbd: add doc for option -f

2013-12-04 Thread Wenchao Xia

Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com
---
 qemu-nbd.c|1 +
 qemu-nbd.texi |2 ++
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index fe6053c..136e8c9 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -79,6 +79,7 @@ static void usage(const char *name)
 #endif
 \n
 Block device options:\n
+  -f, --format=FORMAT  set image format (raw, qcow2, ...)\n
   -r, --read-only  export read-only\n
   -s, --snapshot   use FILE as an external snapshot, create a temporary\n
file with backing_file=FILE, redirect the write to\n
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 5b55f76..0a7e013 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -22,6 +22,8 @@ Export QEMU disk image using NBD protocol.
   interface to bind to (default @samp{0.0.0.0})
 @item -k, --socket=@var{path}
   Use a unix socket with path @var{path}
+@item -f, --format=@var{format}
+  Set image format as @var{format}
 @item -r, --read-only
   export read-only
 @item -P, --partition=@var{num}
-- 
1.7.1

Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default

2013-12-04 Thread Stefan Hajnoczi

On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote:
  Developers will only be happy with seccomp if it's easy and rewarding to
  support/debug.
 
 Agreed.
 
 As a developer, how do you feel about the audit/syslog based approach I 
 mentioned earlier?

I used the commands you posted (I think that's what you mean).  They
produce useful output.

The problem is that without an error message on stderr or from the
shell, no one will think QEMU process dead and hung == check seccomp
immediately.  It's frustrating to deal with a silent failure.

Stefan

Re: [Qemu-devel] [RFC PATCH 03/21] target-arm: adjust TTBCR for TrustZone feature

2013-12-04 Thread Fedorov Sergey



On 12/03/2013 04:15 PM, Peter Crosthwaite wrote:

On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote:

TTBCR has additional fields PD0 and PD1 when using Short-descriptor
translation table format on a CPU with TrustZone feature support.

Signed-off-by: Sergey Fedorov s.fedo...@samsung.com
---
  target-arm/helper.c |4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index a247ca0..6642e53 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1159,8 +1159,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
  {
  int maskshift = extract32(value, 0, 3);

-if (arm_feature(env, ARM_FEATURE_LPAE)) {
+if (arm_feature(env, ARM_FEATURE_LPAE)  (value  (1  31))) {

This appears to be changing more than just trustzone dependent
behavior. That is, if we take just this hunk and ignore the one below
you see a change in the non-tz behaviour. Is the hunk legitimate
irrespective of trustzone support?


Yes, current implementation is not accurate according to ARMv7-AR 
reference manual. See B4.1.153 TTBCR, Translation Table Base Control 
Register, VMSA | TTBCR format when using the Long-descriptor translation 
table format. When LPAE feature is supported, EAE, bit[31] selects 
translation descriptor format and, therefore, TTBCR format.





  value = ~((7  19) | (3  14) | (0xf  3));
+} else if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+value = 0x37;
  } else {
  value = 7;
  }

There are a few magic numbers in the patch probably worth macrofiying.


As I can see, magic numbers are widely used through all of this file to 
represent CP register fields and other things. Maybe the macrofying 
should be done separately from this patch series?




Regards,
Peter


--
1.7.9.5






Best regards,
Sergey Fedorov

Re: [Qemu-devel] [RFC PATCH 04/21] target-arm: preserve RAO/WI bits of ARMv7 SCTLR

2013-12-04 Thread Fedorov Sergey



On 12/03/2013 04:17 PM, Peter Crosthwaite wrote:

On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote:

From: Svetlana Fedoseeva s.fedose...@samsung.com

Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com
Signed-off-by: Sergey Fedorov s.fedo...@samsung.com
---
  target-arm/helper.c |4 
  1 file changed, 4 insertions(+)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 6642e53..d7922ad 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1507,6 +1507,10 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {

  static int sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t 
value)
  {
+if (arm_feature(env, ARM_FEATURE_V7)) {
+value = value | 0x00c50078; /* This bits are RAO/WI */

Magic number. these bits .


Would be acceptable to substitute this magic number with bitshifted 
constants combined with bitwise or, e.g. as in vmsa_ttbcr_raw_write()?





+}
+
  env-cp15.c1_sys = value;
  /* ??? Lots of these bits are not implemented.  */
  /* This may enable/disable the MMU, so do a TLB flush.  */
--
1.7.9.5






--
Best regards,
Sergey Fedorov, Junior Software Engineer,
Samsung RD Institute Rus.
E-mail: s.fedo...@samsung.com

Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode

2013-12-04 Thread Fedorov Sergey



On 12/03/2013 04:51 PM, Peter Maydell wrote:

On 3 December 2013 12:20, Peter Crosthwaite
peter.crosthwa...@xilinx.com wrote:

On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com wrote:

From: Svetlana Fedoseeva s.fedose...@samsung.com

Define CPU monitor mode. Adjust core registers banking. Adjust CPU VM
state info. Provide CPU mode name for monitor mode.

Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com
Signed-off-by: Sergey Fedorov s.fedo...@samsung.com
---
  target-arm/cpu.h   |7 ---
  target-arm/helper.c|3 +++
  target-arm/machine.c   |   12 ++--
  target-arm/translate.c |2 +-
  4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 0b93e39..94d8bd1 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -124,9 +124,9 @@ typedef struct CPUARMState {
  uint32_t spsr;

  /* Banked registers.  */
-uint32_t banked_spsr[6];
-uint32_t banked_r13[6];
-uint32_t banked_r14[6];
+uint32_t banked_spsr[7];
+uint32_t banked_r13[7];
+uint32_t banked_r14[7];


Are there any more modes yet to be implemented? It might save on
future VMSD version bumps if we just pad this out to its ultimate
value now.

The remaining mode defined for AArch32 which we don't
implement yet is Hyp mode, which has a banked R13 and SPSR,
but not a banked LR.

-- PMM




So should a number of banked core registers be increased more? 
Personally, I'd like to keep this patch only TZ-related.


Best regards,
Sergey Fedorov

[Qemu-devel] [PATCH v2] qcow2: Zero-initialise first cluster for new images

2013-12-04 Thread Kevin Wolf

Strictly speaking, this is only required for has_zero_init() == false,
but it's easy enough to just do a cluster-aligned write that is padded
with zeros after the header.

This fixes that after 'qemu-img create' header extensions are attempted
to be parsed that are really just random leftover data.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Kevin Wolf kw...@redhat.com
Reviewed-by: Fam Zheng f...@redhat.com
Reviewed-by: Paolo Bonzini pbonz...@redhat.com
---
 block/qcow2.c | 36 
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6e5d98d..a205f31 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1471,7 +1471,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
  * size for any qcow2 image.
  */
 BlockDriverState* bs;
-QCowHeader header;
+QCowHeader *header;
 uint8_t* refcount_table;
 Error *local_err = NULL;
 int ret;
@@ -1489,30 +1489,34 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 }
 
 /* Write the header */
-memset(header, 0, sizeof(header));
-header.magic = cpu_to_be32(QCOW_MAGIC);
-header.version = cpu_to_be32(version);
-header.cluster_bits = cpu_to_be32(cluster_bits);
-header.size = cpu_to_be64(0);
-header.l1_table_offset = cpu_to_be64(0);
-header.l1_size = cpu_to_be32(0);
-header.refcount_table_offset = cpu_to_be64(cluster_size);
-header.refcount_table_clusters = cpu_to_be32(1);
-header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
-header.header_length = cpu_to_be32(sizeof(header));
+QEMU_BUILD_BUG_ON((1  MIN_CLUSTER_BITS)  sizeof(*header));
+header = g_malloc0(cluster_size);
+*header = (QCowHeader) {
+.magic  = cpu_to_be32(QCOW_MAGIC),
+.version= cpu_to_be32(version),
+.cluster_bits   = cpu_to_be32(cluster_bits),
+.size   = cpu_to_be64(0),
+.l1_table_offset= cpu_to_be64(0),
+.l1_size= cpu_to_be32(0),
+.refcount_table_offset  = cpu_to_be64(cluster_size),
+.refcount_table_clusters= cpu_to_be32(1),
+.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
+.header_length  = cpu_to_be32(sizeof(*header)),
+};
 
 if (flags  BLOCK_FLAG_ENCRYPT) {
-header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+header-crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
 } else {
-header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+header-crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
 }
 
 if (flags  BLOCK_FLAG_LAZY_REFCOUNTS) {
-header.compatible_features |=
+header-compatible_features |=
 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
 }
 
-ret = bdrv_pwrite(bs, 0, header, sizeof(header));
+ret = bdrv_pwrite(bs, 0, header, cluster_size);
+g_free(header);
 if (ret  0) {
 error_setg_errno(errp, -ret, Could not write qcow2 header);
 goto out;
-- 
1.8.1.4

Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support

2013-12-04 Thread Fedorov Sergey


On 12/03/2013 12:48 PM, Sergey Fedorov wrote:

This patch set implements a basic support of CPU core TrustZone feature. The
following major functionalities are implemented:
   * CPU monitor mode
   * Separate code translation for each secure state
   * CPACR  NSACR co-processor access control
   * Separate TLB for each secure state
   * Co-processor register banking
   * SMC instruction
   * FIQ/IRQ routing to monitor mode

There is no support for banked co-processor register migration, save/load its
VM state yet. That is an open question how to implement this functionality. Any
suggestions is greatly appreciated.

This patch set is a request for comments for the proof of concept.

Sergey Fedorov (18):
   target-arm: move SCR  VBAR into TrustZone register list
   target-arm: adjust TTBCR for TrustZone feature
   target-arm: add arm_is_secure() helper
   target-arm: reject switching to monitor mode from non-secure state
   target-arm: adjust arm_current_pl() for TrustZone
   target-arm: adjust SCR CP15 register access rights
   target-arm: add non-secure Translation Block flag
   target-arm: implement CPACR register logic
   target-arm: add NSACR support
   target-arm: add SDER definition
   target-arm: split TLB for secure state
   target-arm: add banked coprocessor register type
   target-arm: convert appropriate coprocessor registers to banked type
   target-arm: use c13_context field for CONTEXTIDR
   target-arm: switch banked CP registers
   target-arm: add MVBAR support
   target-arm: implement SMC instruction
   target-arm: implement IRQ/FIQ routing to Monitor mode

Svetlana Fedoseeva (3):
   target-arm: add TrustZone CPU feature
   target-arm: preserve RAO/WI bits of ARMv7 SCTLR
   target-arm: add CPU Monitor mode

  target-arm/cpu.c   |6 +-
  target-arm/cpu.h   |  126 
  target-arm/helper.c|  308 +-
  target-arm/machine.c   |   12 +-
  target-arm/translate.c |  388 ++--
  target-arm/translate.h |2 +
  6 files changed, 585 insertions(+), 257 deletions(-)



We'd like this patch series finally to be merged into mainstream. What 
should be done to achieve this goal?


Best regards,
Sergey Fedorov

Re: [Qemu-devel] [PATCH v3] block: Close backing file early in bdrv_img_create

2013-12-04 Thread Stefan Hajnoczi

On Tue, Dec 03, 2013 at 02:57:52PM +0100, Max Reitz wrote:
 Leaving the backing file open although it is not needed anymore can
 cause problems if it is opened through a block driver which allows
 exclusive access only and if the create function of the block driver
 used for the top image (the one being created) tries to close and reopen
 the image file (which will include opening the backing file a second
 time).
 
 In particular, this will happen with a backing file opened through
 qemu-nbd and using qcow2 as the top image file format (which reopens the
 image to flush it to disk).
 
 In addition, the BlockDriverState in bdrv_img_create() is used for the
 backing file only; it should therefore be made local to the respective
 block.
 
 Signed-off-by: Max Reitz mre...@redhat.com
 ---
 v3:
  - Reverted the revert in v2 and limited the BlockDriverState once again
to the scope its used in (addressing Xia's comment); added a missing
bdrv_unref() in contrast to v1 (addressing Kevin's comment to v1)
 
 v2:
  - Minimizing the changes prevents introducing a leak of the
BlockDriverState in case of an error in bdrv_open() (thanks, Kevin).
 ---
  block.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan

Re: [Qemu-devel] [Bug 1257334] [NEW] diffuse handling of image creation from another path

2013-12-04 Thread Stefan Hajnoczi

On Tue, Dec 03, 2013 at 03:22:28PM -, Florian Miksch wrote:
Hi,
This does not look like a bug.  Please see the explanation below...

Inlining Florian's reproducer script so we can discuss it via email:

 #!/bin/bash
 # This script does create a backing image and overlay to
 # reproduce the reported bug.
 #...
  
 mkdir a
 # Create an image from another path e.g. in the directory 'a'
 qemu-img create -f qcow2 a/blob.img 10G
  
 # Create an overlay image from another path in the same directory
 qemu-img create -f qcow2 -b a/blob.img a/ovl.img
  
 # Get Info in the new directory
 cd a
 qemu-img info ovl.img
 # Output:
 #   image: ovl.img
 #   file format: qcow2
 #   virtual size: 10G (10737418240 bytes)
 #   disk size: 196K
 #   cluster_size: 65536
 #   backing file: a/blob.img
  
 # Get the Info from another directory
 cd ..
 qemu-img info a/ovl.img
 # Output:
 #   image: a/ovl.img
 #   file format: qcow2
 #   virtual size: 10G (10737418240 bytes)
 #   disk size: 196K
 #   cluster_size: 65536
 #   backing file: a/blob.img (actual path: a/a/blob.img)
  
 # Bug:
 #   Compare 'image'
 #   Compare 'backing file'
 #   Look at 'actual path'

The behavior you are showing here is explained as follows:

Backing file paths can be relative or absolute and are stored inside
the image file.  In this case you are providing a relative backing
file path.

Relative backing file paths are interpreted against the image filename.
In other words:

  join_path(dirname('a/ovl.img'), 'a/blob.img') - 'a/a/blob.img'

  
 # 'qemu-img info' takes the recommended path as name of the image
 # and the actual path is then: a/a/blob.img
  
 qemu-img commit a/ovl.img
 # Now commit fails  
 # -- ERROR No such file or directory

The problem was:
 qemu-img create -f qcow2 -b a/blob.img a/ovl.img

If you want to use a relative backing file path, remember that it is
relative to the image file:

cd a  qemu-img create -f qcow2 -b blob.img ovl.img

Then 'qemu-img commit a/ovl.img' will work as expected.

Stefan

Re: [Qemu-devel] [PATCH v2] qcow2: Zero-initialise first cluster for new images

2013-12-04 Thread Stefan Hajnoczi

On Wed, Dec 04, 2013 at 11:06:36AM +0100, Kevin Wolf wrote:
 Strictly speaking, this is only required for has_zero_init() == false,
 but it's easy enough to just do a cluster-aligned write that is padded
 with zeros after the header.
 
 This fixes that after 'qemu-img create' header extensions are attempted
 to be parsed that are really just random leftover data.
 
 Cc: qemu-sta...@nongnu.org
 Signed-off-by: Kevin Wolf kw...@redhat.com
 Reviewed-by: Fam Zheng f...@redhat.com
 Reviewed-by: Paolo Bonzini pbonz...@redhat.com
 ---
  block/qcow2.c | 36 
  1 file changed, 20 insertions(+), 16 deletions(-)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan

Re: [Qemu-devel] [RFC PATCH 03/21] target-arm: adjust TTBCR for TrustZone feature

2013-12-04 Thread Peter Crosthwaite

On Wed, Dec 4, 2013 at 7:50 PM, Fedorov Sergey s.fedo...@samsung.com wrote:

 On 12/03/2013 04:15 PM, Peter Crosthwaite wrote:

 On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com
 wrote:

 TTBCR has additional fields PD0 and PD1 when using Short-descriptor
 translation table format on a CPU with TrustZone feature support.

 Signed-off-by: Sergey Fedorov s.fedo...@samsung.com
 ---
   target-arm/helper.c |4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/target-arm/helper.c b/target-arm/helper.c
 index a247ca0..6642e53 100644
 --- a/target-arm/helper.c
 +++ b/target-arm/helper.c
 @@ -1159,8 +1159,10 @@ static int vmsa_ttbcr_raw_write(CPUARMState *env,
 const ARMCPRegInfo *ri,
   {
   int maskshift = extract32(value, 0, 3);

 -if (arm_feature(env, ARM_FEATURE_LPAE)) {
 +if (arm_feature(env, ARM_FEATURE_LPAE)  (value  (1  31))) {

 This appears to be changing more than just trustzone dependent
 behavior. That is, if we take just this hunk and ignore the one below
 you see a change in the non-tz behaviour. Is the hunk legitimate
 irrespective of trustzone support?


 Yes, current implementation is not accurate according to ARMv7-AR reference
 manual. See B4.1.153 TTBCR, Translation Table Base Control Register, VMSA |
 TTBCR format when using the Long-descriptor translation table format. When
 LPAE feature is supported, EAE, bit[31] selects translation descriptor
 format and, therefore, TTBCR format.


Ok,

You should probably split it off as a separate patch. And you could
send probably send it immediately. What you just wrote is a nice
commit message.



   value = ~((7  19) | (3  14) | (0xf  3));
 +} else if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
 +value = 0x37;
   } else {
   value = 7;
   }

 There are a few magic numbers in the patch probably worth macrofiying.


 As I can see, magic numbers are widely used through all of this file to
 represent CP register fields and other things. Maybe the macrofying should
 be done separately from this patch series?


So target-arm is riddled with legacy style. Converting all of
target-arm to self-doccing macros is a big job, but if we keep
expanding without doing it that job only gets bigger. I'll defer to
PMM on what we want to policy to be going forward. - any thoughts?

Regards,
Peter


 Regards,
 Peter

 --
 1.7.9.5




 Best regards,
 Sergey Fedorov

Re: [Qemu-devel] [PATCH 00/23] qemu state loading issues

2013-12-04 Thread Michael S. Tsirkin

On Tue, Dec 03, 2013 at 06:24:24PM +, Peter Maydell wrote:
 On 3 December 2013 16:28, Michael S. Tsirkin m...@redhat.com wrote:
For example:
 
https://bugzilla.redhat.com/show_bug.cgi?id=588133#c8
https://bugzilla.redhat.com/show_bug.cgi?id=588133#c9
 
 I get not authorized errors on both of those.

Oh, sorry :(
You'll have to take my word on it that what happened there
was that a bug reporter saved state and suggested that
developer attempt to load it to reproduce the bug.

  This patchset is the result of that audit: it addresses this set of
  security issues by adding input validation and failing migration on
  invalid input.
 
 I notice that some but not all of these patches have CVE numbers
 in the commit messages -- what's the distinction that meant some
 of them get CVEs and some don't?
 
 thanks
 -- PMM

The one that does not have a CVE is 23/23, this is
a NULL pointer dereference on invalid input, which
is not nice but probably not exploitable.

-- 
MST

Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode

2013-12-04 Thread Peter Crosthwaite

On Wed, Dec 4, 2013 at 8:01 PM, Fedorov Sergey s.fedo...@samsung.com wrote:

 On 12/03/2013 04:51 PM, Peter Maydell wrote:

 On 3 December 2013 12:20, Peter Crosthwaite
 peter.crosthwa...@xilinx.com wrote:

 On Tue, Dec 3, 2013 at 6:48 PM, Sergey Fedorov s.fedo...@samsung.com
 wrote:

 From: Svetlana Fedoseeva s.fedose...@samsung.com

 Define CPU monitor mode. Adjust core registers banking. Adjust CPU VM
 state info. Provide CPU mode name for monitor mode.

 Signed-off-by: Svetlana Fedoseeva s.fedose...@samsung.com
 Signed-off-by: Sergey Fedorov s.fedo...@samsung.com
 ---
   target-arm/cpu.h   |7 ---
   target-arm/helper.c|3 +++
   target-arm/machine.c   |   12 ++--
   target-arm/translate.c |2 +-
   4 files changed, 14 insertions(+), 10 deletions(-)

 diff --git a/target-arm/cpu.h b/target-arm/cpu.h
 index 0b93e39..94d8bd1 100644
 --- a/target-arm/cpu.h
 +++ b/target-arm/cpu.h
 @@ -124,9 +124,9 @@ typedef struct CPUARMState {
   uint32_t spsr;

   /* Banked registers.  */
 -uint32_t banked_spsr[6];
 -uint32_t banked_r13[6];
 -uint32_t banked_r14[6];
 +uint32_t banked_spsr[7];
 +uint32_t banked_r13[7];
 +uint32_t banked_r14[7];

 Are there any more modes yet to be implemented? It might save on
 future VMSD version bumps if we just pad this out to its ultimate
 value now.

 The remaining mode defined for AArch32 which we don't
 implement yet is Hyp mode, which has a banked R13 and SPSR,
 but not a banked LR.

 -- PMM



 So should a number of banked core registers be increased more? Personally,
 I'd like to keep this patch only TZ-related.


So what im proposing is just a slightly more general patch. Is it
really any more complicated than just applying your change pattern for
the hyp mode? Patch subject would be something like:

target-arm: Add all remaining missing modes

The motiviation is less VMSD version bumps in ARM CPU (a place where I
expect assume such version bumps to be considerable annoyance).

Regards,
Peter

 Best regards,
 Sergey Fedorov

Re: [Qemu-devel] [PATCH 12/12] target-arm: A64: add support for compare and branch imm

2013-12-04 Thread Peter Maydell

On 4 December 2013 00:48, Richard Henderson r...@twiddle.net wrote:
 On 12/04/2013 01:32 PM, Peter Maydell wrote:
 You're right that we can just make this function return the TCGv
 temp rather than making the caller pass one in. Are you suggesting
 the 64-bit case should return cpu_X[reg] rather than a copy of it,
 though? I think it would be pretty hard to reason about if you had to
 remember that sometimes you got a trashable copy and sometimes
 the real register.

 I think that the normal case is to write to the output in one step, and thus
 having an input overlap an output isn't your problem but tcg's.  I would think
 the case of multi-step output to be fairly rare, and easily solvable by always
 allocating an output temporary.

Does a copy to temp actually cost us anything, though? I was
under the impression the TCG optimizer would basically throw
it away again.

I suppose you could just treat the semantics of it as assume you
can't trash this TCGv in all cases and rely on the auto-free.

 Am I off-base on the multi-output thing?  Modulo flags setting, of course, but
 I think that special case ought to be relatively solvable.

 (I'm still a bit on the fence about that pool of auto-freeing temporaries.
 Manual temp management is certainly a fertile source of decoder bugs,
 but in the longer term we might want to push the functionality down into
 the common code rather than having an ad-hoc thing in one decoder.)

 See also Sparc and S390.  ;-)

Ah, I hadn't noticed we were already doing this in other frontends.

-- PMM

Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support

2013-12-04 Thread Peter Crosthwaite

Hi Sergey,

On Wed, Dec 4, 2013 at 8:08 PM, Fedorov Sergey s.fedo...@samsung.com wrote:
 On 12/03/2013 12:48 PM, Sergey Fedorov wrote:

 This patch set implements a basic support of CPU core TrustZone feature.
 The
 following major functionalities are implemented:
* CPU monitor mode
* Separate code translation for each secure state
* CPACR  NSACR co-processor access control
* Separate TLB for each secure state
* Co-processor register banking
* SMC instruction
* FIQ/IRQ routing to monitor mode

 There is no support for banked co-processor register migration, save/load
 its
 VM state yet. That is an open question how to implement this
 functionality. Any
 suggestions is greatly appreciated.

 This patch set is a request for comments for the proof of concept.

 Sergey Fedorov (18):
target-arm: move SCR  VBAR into TrustZone register list
target-arm: adjust TTBCR for TrustZone feature
target-arm: add arm_is_secure() helper
target-arm: reject switching to monitor mode from non-secure state
target-arm: adjust arm_current_pl() for TrustZone
target-arm: adjust SCR CP15 register access rights
target-arm: add non-secure Translation Block flag
target-arm: implement CPACR register logic
target-arm: add NSACR support
target-arm: add SDER definition
target-arm: split TLB for secure state
target-arm: add banked coprocessor register type
target-arm: convert appropriate coprocessor registers to banked type
target-arm: use c13_context field for CONTEXTIDR
target-arm: switch banked CP registers
target-arm: add MVBAR support
target-arm: implement SMC instruction
target-arm: implement IRQ/FIQ routing to Monitor mode

 Svetlana Fedoseeva (3):
target-arm: add TrustZone CPU feature
target-arm: preserve RAO/WI bits of ARMv7 SCTLR
target-arm: add CPU Monitor mode

   target-arm/cpu.c   |6 +-
   target-arm/cpu.h   |  126 
   target-arm/helper.c|  308 +-
   target-arm/machine.c   |   12 +-
   target-arm/translate.c |  388
 ++--
   target-arm/translate.h |2 +
   6 files changed, 585 insertions(+), 257 deletions(-)


 We'd like this patch series finally to be merged into mainstream. What
 should be done to achieve this goal?


Send patch series' just like this and either action or challenge
reviewer comments (as you have already done). Next time, you can drop
the RFC, that means you intend the series for review+merge. If you get
no reply after a week ping the series.

I'm about half way through the series on my first pass. And I guess
PMM has had a look as he has commented on one my comments.

A 21 patch series with +500/-250 is long, and queue maintainers are
generally hesitant to take partial series (as its often contrary to
author intention). Things generally go faster with smaller series. You
have some low-complexity cleanup type patches that dont add new
features than you could potentially send as a smaller initial series
to get upstream quickly, then take more time on the harder stuff as
follow up series with reduced patch count.

Regards,
Peter

 Best regards,
 Sergey Fedorov

Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support

2013-12-04 Thread Peter Maydell

On 4 December 2013 10:08, Fedorov Sergey s.fedo...@samsung.com wrote:
 On 12/03/2013 12:48 PM, Sergey Fedorov wrote:

 This patch set implements a basic support of CPU core TrustZone feature.

 We'd like this patch series finally to be merged into mainstream. What
 should be done to achieve this goal?

I'd like to see TZ support in mainline too. The high level answer
is that it needs to get code reviewed, and you need to fix issues
that are raised in code review. Unfortunately my review queue is
currently pretty full (it has Allwinner board support, DIGIC board
support, a bunch of Cadence fixes, ARMv8 32 bit new instructions
and the A64 64 bit instruction support in it, all of which are fairly
big patchsets), so it may take me a little while to get to this
patchset. It is on my todo list though, so it won't get forgotten :-)

thanks
-- PMM

Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode

2013-12-04 Thread Peter Maydell

On 4 December 2013 10:58, Peter Crosthwaite
peter.crosthwa...@xilinx.com wrote:
 So what im proposing is just a slightly more general patch. Is it
 really any more complicated than just applying your change pattern for
 the hyp mode?

I think it would be, because of the wrinkle that hyp mode doesn't
have a banked LR, so the existing assume we can just translate
the mode into a single index good for both LR and SP logic
would break.

The minimal change if we wanted to keep VMSD bumps to a
minimum would be to increase the size of the banked_spsr[]
and banked_r13[] arrays to allow for Hyp mode but do nothing
else (except add a comment about it I guess).

 The motiviation is less VMSD version bumps in ARM CPU (a place where I
 expect assume such version bumps to be considerable annoyance).

Well, they're only a problem at the point where we start trying
to support cross-version migration; we're not at that point yet...

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2 2/2] target-i386: Intel MPX

2013-12-04 Thread Liu, Jinsong

 
 Almost there.  Migration (vmstate) is still missing.
 

Like this:

==
From faead85c0dbe62da896e0ed9e165d98e10216968 Mon Sep 17 00:00:00 2001
From: Liu Jinsong jinsong@intel.com
Date: Wed, 4 Dec 2013 16:56:49 +0800
Subject: [PATCH 2/2] target-i386: Intel MPX

Add some MPX related definiation, and hardcode sizes and offsets
of xsave features 3 and 4. It also add corresponding part to
kvm_get/put_xsave, and vmstate.

Signed-off-by: Liu Jinsong jinsong@intel.com
---
 target-i386/cpu.c |4 
 target-i386/cpu.h |   22 +++---
 target-i386/kvm.c |   10 ++
 target-i386/machine.c |   32 
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 544b57f..52ca029 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -336,6 +336,10 @@ typedef struct ExtSaveArea {
 static const ExtSaveArea ext_save_areas[] = {
 [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
 .offset = 0x240, .size = 0x100 },
+[3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
+.offset = 0x3c0, .size = 0x40  },
+[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
+.offset = 0x400, .size = 0x10  },
 };
 
 const char *get_register_name_32(unsigned int reg)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index ea373e8..5c1dd17 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -380,9 +380,12 @@
 
 #define MSR_VM_HSAVE_PA 0xc0010117
 
-#define XSTATE_FP   1
-#define XSTATE_SSE  2
-#define XSTATE_YMM  4
+#define XSTATE_FP   (1ULL  0)
+#define XSTATE_SSE  (1ULL  1)
+#define XSTATE_YMM  (1ULL  2)
+#define XSTATE_BNDREGS  (1ULL  3)
+#define XSTATE_BNDCSR   (1ULL  4)
+
 
 /* CPUID feature words */
 typedef enum FeatureWord {
@@ -545,6 +548,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EBX_ERMS (1  9)
 #define CPUID_7_0_EBX_INVPCID  (1  10)
 #define CPUID_7_0_EBX_RTM  (1  11)
+#define CPUID_7_0_EBX_MPX  (1  14)
 #define CPUID_7_0_EBX_RDSEED   (1  18)
 #define CPUID_7_0_EBX_ADX  (1  19)
 #define CPUID_7_0_EBX_SMAP (1  20)
@@ -695,6 +699,16 @@ typedef union {
 uint64_t q;
 } MMXReg;
 
+typedef struct BNDReg {
+uint64_t lb;
+uint64_t ub;
+} BNDReg;
+
+typedef struct BNDCSReg {
+uint64_t cfg;
+uint64_t sts;
+} BNDCSReg;
+
 #ifdef HOST_WORDS_BIGENDIAN
 #define XMM_B(n) _b[15 - (n)]
 #define XMM_W(n) _w[7 - (n)]
@@ -912,6 +926,8 @@ typedef struct CPUX86State {
 
 uint64_t xstate_bv;
 XMMReg ymmh_regs[CPU_NB_REGS];
+BNDReg bnd_regs[4];
+BNDCSReg bndcs_regs;
 
 uint64_t xcr0;
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 749aa09..347d3d3 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -980,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu)
 #define XSAVE_XMM_SPACE   40
 #define XSAVE_XSTATE_BV   128
 #define XSAVE_YMMH_SPACE  144
+#define XSAVE_BNDREGS 240
+#define XSAVE_BNDCSR  256
 
 static int kvm_put_xsave(X86CPU *cpu)
 {
@@ -1012,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu)
 *(uint64_t *)xsave-region[XSAVE_XSTATE_BV] = env-xstate_bv;
 memcpy(xsave-region[XSAVE_YMMH_SPACE], env-ymmh_regs,
 sizeof env-ymmh_regs);
+memcpy(xsave-region[XSAVE_BNDREGS], env-bnd_regs,
+sizeof env-bnd_regs);
+memcpy(xsave-region[XSAVE_BNDCSR], env-bndcs_regs,
+sizeof(env-bndcs_regs));
 r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
 return r;
 }
@@ -1294,6 +1300,10 @@ static int kvm_get_xsave(X86CPU *cpu)
 env-xstate_bv = *(uint64_t *)xsave-region[XSAVE_XSTATE_BV];
 memcpy(env-ymmh_regs, xsave-region[XSAVE_YMMH_SPACE],
 sizeof env-ymmh_regs);
+memcpy(env-bnd_regs, xsave-region[XSAVE_BNDREGS],
+sizeof env-bnd_regs);
+memcpy(env-bndcs_regs, xsave-region[XSAVE_BNDCSR],
+sizeof(env-bndcs_regs));
 return 0;
 }
 
diff --git a/target-i386/machine.c b/target-i386/machine.c
index e568da2..ca8be7d 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -63,6 +63,36 @@ static const VMStateDescription vmstate_ymmh_reg = {
 #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) 
\
 VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
 
+static const VMStateDescription vmstate_bnd_regs = {
+.name = bnd_regs,
+.version_id = 1,
+.minimum_version_id = 1,
+.minimum_version_id_old = 1,
+.fields  = (VMStateField []) {
+VMSTATE_UINT64(lb, BNDReg),
+VMSTATE_UINT64(ub, BNDReg),
+VMSTATE_END_OF_LIST()
+}
+};
+
+#define VMSTATE_BNDREG_VARS(_field, _state, _n, _v)  \
+VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_bnd_regs, BNDReg)
+
+static const

Re: [Qemu-devel] [PATCH v8 4/5] hw/arm: add allwinner a10 SoC support

2013-12-04 Thread Peter Crosthwaite

On Wed, Dec 4, 2013 at 6:09 PM, liguang lig.f...@cn.fujitsu.com wrote:
 Signed-off-by: liguang lig.f...@cn.fujitsu.com
 ---
  hw/arm/Makefile.objs   |2 +-
  hw/arm/allwinner-a10.c |   77 
 
  include/hw/arm/allwinner-a10.h |   36 ++
  3 files changed, 114 insertions(+), 1 deletions(-)
  create mode 100644 hw/arm/allwinner-a10.c
  create mode 100644 include/hw/arm/allwinner-a10.h

 diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
 index 3671b42..b9e5983 100644
 --- a/hw/arm/Makefile.objs
 +++ b/hw/arm/Makefile.objs
 @@ -4,4 +4,4 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
  obj-y += tosa.o versatilepb.o vexpress.o xilinx_zynq.o z2.o

  obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
 -obj-y += omap1.o omap2.o strongarm.o
 +obj-y += omap1.o omap2.o strongarm.o allwinner-a10.o
 diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
 new file mode 100644
 index 000..c4699b7
 --- /dev/null
 +++ b/hw/arm/allwinner-a10.c
 @@ -0,0 +1,77 @@
 +#include hw/sysbus.h
 +#include hw/devices.h
 +#include hw/arm/allwinner-a10.h
 +
 +
 +static void aw_a10_init(Object *obj)
 +{
 +AwA10State *s = AW_A10(obj);
 +DeviceState *dev;
 +
 +object_initialize(s-cpu, sizeof(s-cpu), cortex-a8- TYPE_ARM_CPU);
 +object_property_add_child(obj, cpu, OBJECT(s-cpu), NULL);
 +
 +object_initialize(s-intc, sizeof(s-timer), TYPE_AW_A10_PIC);
 +dev = DEVICE(s-intc);

You could just use DEVICE(foo) inline here and below and drop the dev
variable to reduce verbosity.

 +qdev_set_parent_bus(dev, sysbus_get_default());
 +
 +object_initialize(s-timer, sizeof(s-timer), TYPE_AW_A10_PIT);
 +dev = DEVICE(s-timer);
 +qdev_set_parent_bus(dev, sysbus_get_default());
 +}
 +
 +static void aw_a10_realize(DeviceState *dev, Error **errp)
 +{
 +AwA10State *s = AW_A10(dev);
 +SysBusDevice *sysbusdev;
 +uint8_t i;
 +Error *err = NULL;
 +
 +object_property_set_bool(OBJECT(s-cpu), true, realized, err);
 +s-cpu_irq[0] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_IRQ);
 +s-cpu_irq[1] = qdev_get_gpio_in(DEVICE(s-cpu), ARM_CPU_FIQ);
 +
 +object_property_set_bool(OBJECT(s-intc), true, realized, err);

You neither assert nor propagate the error. So the repeated usage
below will cause a delayed (and somewhat obscure) assertion below if
both happen to error for some reason.

 +sysbusdev = SYS_BUS_DEVICE(s-intc);
 +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIC_REG_BASE);
 +sysbus_connect_irq(sysbusdev, 0, s-cpu_irq[0]);
 +sysbus_connect_irq(sysbusdev, 1, s-cpu_irq[1]);
 +for (i = 0; i  AW_A10_PIC_INT_NR; i++) {
 +s-irq[i] = qdev_get_gpio_in(DEVICE(s-intc), i);
 +}
 +
 +object_property_set_bool(OBJECT(s-timer), true, realized, err);
 +sysbusdev = SYS_BUS_DEVICE(s-timer);
 +sysbus_mmio_map(sysbusdev, 0, AW_A10_PIT_REG_BASE);
 +sysbus_connect_irq(sysbusdev, 0, s-irq[22]);
 +sysbus_connect_irq(sysbusdev, 1, s-irq[23]);
 +sysbus_connect_irq(sysbusdev, 2, s-irq[24]);
 +sysbus_connect_irq(sysbusdev, 3, s-irq[25]);
 +sysbus_connect_irq(sysbusdev, 4, s-irq[67]);
 +sysbus_connect_irq(sysbusdev, 5, s-irq[68]);
 +
 +serial_mm_init(get_system_memory(), AW_A10_UART0_REG_BASE, 2, s-irq[1],
 +   115200, serial_hds[0], DEVICE_NATIVE_ENDIAN);
 +}
 +
 +static void aw_a10_class_init(ObjectClass *oc, void *data)
 +{
 +DeviceClass *dc = DEVICE_CLASS(oc);
 +
 +dc-realize = aw_a10_realize;
 +}
 +
 +static const TypeInfo aw_a10_type_info = {
 +.name = TYPE_AW_A10,
 +.parent = TYPE_DEVICE,
 +.instance_size = sizeof(AwA10State),
 +.instance_init = aw_a10_init,
 +.class_init = aw_a10_class_init,
 +};
 +
 +static void aw_a10_register_types(void)
 +{
 +type_register_static(aw_a10_type_info);
 +}
 +
 +type_init(aw_a10_register_types)
 diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h
 new file mode 100644
 index 000..a3e7b77
 --- /dev/null
 +++ b/include/hw/arm/allwinner-a10.h
 @@ -0,0 +1,36 @@
 +#ifndef ALLWINNER_H_
 +
 +#include qemu-common.h
 +#include qemu/error-report.h
 +#include hw/char/serial.h
 +#include hw/arm/arm.h
 +#include hw/timer/allwinner-a10-pit.h
 +#include hw/intc/allwinner-a10-pic.h
 +
 +#include sysemu/sysemu.h
 +#include exec/address-spaces.h
 +
 +
 +#define AW_A10_PIC_REG_BASE 0x01c20400
 +#define AW_A10_PIT_REG_BASE 0x01c20c00
 +#define AW_A10_UART0_REG_BASE   0x01c28000
 +
 +#define AW_A10_SDRAM_BASE   0x4000
 +
 +#define TYPE_AW_A10 allwiner-a10

allwinner

 +#define AW_A10(obj) OBJECT_CHECK(AwA10State, (obj), TYPE_AW_A10)
 +
 +typedef struct AwA10State {
 +/* private */
 +DeviceState parent_obj;
 +/* public */
 +
 +ARMCPU cpu;
 +qemu_irq irq[AW_A10_PIC_INT_NR];
 +qemu_irq cpu_irq[2];

I dont see the need to keep these as device state. They appear to be
just local variables to realize().

 +

[Qemu-devel] [PATCH v8 1/6] target-arm: Move call to disas_vfp_insn out of disas_coproc_insn.

2013-12-04 Thread Will Newton

Floating point is an extension to the instruction set rather than
a coprocessor, so call it directly from the ARM and Thumb decode
functions.

Signed-off-by: Will Newton will.new...@linaro.org
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
 target-arm/translate.c | 32 +++-
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5f003e7..f63e89d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2636,6 +2636,14 @@ static int disas_vfp_insn(CPUARMState * env, 
DisasContext *s, uint32_t insn)
  rn != ARM_VFP_MVFR1  rn != ARM_VFP_MVFR0)
 return 1;
 }
+
+if (extract32(insn, 28, 4) == 0xf) {
+/* Encodings with T=1 (Thumb) or unconditional (ARM):
+ * only used in v8 and above.
+ */
+return 1;
+}
+
 dp = ((insn  0xf00) == 0xb00);
 switch ((insn  24)  0xf) {
 case 0xe:
@@ -6296,9 +6304,6 @@ static int disas_coproc_insn(CPUARMState * env, 
DisasContext *s, uint32_t insn)
return disas_dsp_insn(env, s, insn);
}
return 1;
-case 10:
-case 11:
-   return disas_vfp_insn (env, s, insn);
 default:
 break;
 }
@@ -6753,6 +6758,13 @@ static void disas_arm_insn(CPUARMState * env, 
DisasContext *s)
 goto illegal_op;
 return;
 }
+if ((insn  0x0f000e10) == 0x0e000a00) {
+/* VFP.  */
+if (disas_vfp_insn(env, s, insn)) {
+goto illegal_op;
+}
+return;
+}
 if (((insn  0x0f30f000) == 0x0510f000) ||
 ((insn  0x0f30f010) == 0x0710f000)) {
 if ((insn  (1  22)) == 0) {
@@ -8033,9 +8045,15 @@ static void disas_arm_insn(CPUARMState * env, 
DisasContext *s)
 case 0xc:
 case 0xd:
 case 0xe:
-/* Coprocessor.  */
-if (disas_coproc_insn(env, s, insn))
+if (((insn  8)  0xe) == 10) {
+/* VFP.  */
+if (disas_vfp_insn(env, s, insn)) {
+goto illegal_op;
+}
+} else if (disas_coproc_insn(env, s, insn)) {
+/* Coprocessor.  */
 goto illegal_op;
+}
 break;
 case 0xf:
 /* swi */
@@ -8765,6 +8783,10 @@ static int disas_thumb2_insn(CPUARMState *env, 
DisasContext *s, uint16_t insn_hw
 insn = (insn  0xe2ff) | ((insn  (1  28))  4) | (1  28);
 if (disas_neon_data_insn(env, s, insn))
 goto illegal_op;
+} else if (((insn  8)  0xe) == 10) {
+if (disas_vfp_insn(env, s, insn)) {
+goto illegal_op;
+}
 } else {
 if (insn  (1  28))
 goto illegal_op;
-- 
1.8.1.4

[Qemu-devel] [PATCH v8 2/6] target-arm: Implement ARMv8 VSEL instruction.

2013-12-04 Thread Will Newton

This adds support for the VSEL floating point selection instruction
which was added in ARMv8.

Signed-off-by: Will Newton will.new...@linaro.org
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
 target-arm/translate.c | 135 -
 1 file changed, 134 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index f63e89d..0a22ad8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2614,6 +2614,139 @@ static TCGv_i32 gen_load_and_replicate(DisasContext *s, 
TCGv_i32 addr, int size)
 return tmp;
 }
 
+static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
+   uint32_t dp)
+{
+uint32_t cc = extract32(insn, 20, 2);
+
+if (dp) {
+TCGv_i64 frn, frm, dest;
+TCGv_i64 tmp, zero, zf, nf, vf;
+
+zero = tcg_const_i64(0);
+
+frn = tcg_temp_new_i64();
+frm = tcg_temp_new_i64();
+dest = tcg_temp_new_i64();
+
+zf = tcg_temp_new_i64();
+nf = tcg_temp_new_i64();
+vf = tcg_temp_new_i64();
+
+tcg_gen_extu_i32_i64(zf, cpu_ZF);
+tcg_gen_ext_i32_i64(nf, cpu_NF);
+tcg_gen_ext_i32_i64(vf, cpu_VF);
+
+tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
+tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
+switch (cc) {
+case 0: /* eq: Z */
+tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
+frn, frm);
+break;
+case 1: /* vs: V */
+tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
+frn, frm);
+break;
+case 2: /* ge: N == V - N ^ V == 0 */
+tmp = tcg_temp_new_i64();
+tcg_gen_xor_i64(tmp, vf, nf);
+tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+frn, frm);
+tcg_temp_free_i64(tmp);
+break;
+case 3: /* gt: !Z  N == V */
+tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
+frn, frm);
+tmp = tcg_temp_new_i64();
+tcg_gen_xor_i64(tmp, vf, nf);
+tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+dest, frm);
+tcg_temp_free_i64(tmp);
+break;
+}
+tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
+tcg_temp_free_i64(frn);
+tcg_temp_free_i64(frm);
+tcg_temp_free_i64(dest);
+
+tcg_temp_free_i64(zf);
+tcg_temp_free_i64(nf);
+tcg_temp_free_i64(vf);
+
+tcg_temp_free_i64(zero);
+} else {
+TCGv_i32 frn, frm, dest;
+TCGv_i32 tmp, zero;
+
+zero = tcg_const_i32(0);
+
+frn = tcg_temp_new_i32();
+frm = tcg_temp_new_i32();
+dest = tcg_temp_new_i32();
+tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
+tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
+switch (cc) {
+case 0: /* eq: Z */
+tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
+frn, frm);
+break;
+case 1: /* vs: V */
+tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
+frn, frm);
+break;
+case 2: /* ge: N == V - N ^ V == 0 */
+tmp = tcg_temp_new_i32();
+tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+frn, frm);
+tcg_temp_free_i32(tmp);
+break;
+case 3: /* gt: !Z  N == V */
+tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
+frn, frm);
+tmp = tcg_temp_new_i32();
+tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+dest, frm);
+tcg_temp_free_i32(tmp);
+break;
+}
+tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
+tcg_temp_free_i32(frn);
+tcg_temp_free_i32(frm);
+tcg_temp_free_i32(dest);
+
+tcg_temp_free_i32(zero);
+}
+
+return 0;
+}
+
+static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
+{
+uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
+
+if (!arm_feature(env, ARM_FEATURE_V8)) {
+return 1;
+}
+
+if (dp) {
+VFP_DREG_D(rd, insn);
+VFP_DREG_N(rn, insn);
+VFP_DREG_M(rm, insn);
+} else {
+rd = VFP_SREG_D(insn);
+rn = VFP_SREG_N(insn);
+rm = VFP_SREG_M(insn);
+}
+
+if ((insn  0x0f800e50) == 0x0e000a00) {
+return handle_vsel(insn, rd, rn, rm, dp);
+}
+return 1;
+}
+
 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
(ie. an undefined instruction).

[Qemu-devel] [PATCH v8 6/6] target-arm: Implement ARMv8 SIMD VMAXNM and VMINNM instructions.

2013-12-04 Thread Will Newton

This adds support for the ARMv8 Advanced SIMD VMAXNM and VMINNM
instructions.

Signed-off-by: Will Newton will.new...@linaro.org
---
 target-arm/translate.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

Changes in v8:
 - Use VFP helper instead of adding a NEON specific one
 - Drop size check

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9a8069e..73ed266 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4553,7 +4553,7 @@ static void gen_neon_narrow_op(int op, int u, int size,
 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
+#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
 
 static const uint8_t neon_3r_sizes[] = {
 [NEON_3R_VHADD] = 0x7,
@@ -4586,7 +4586,7 @@ static const uint8_t neon_3r_sizes[] = {
 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
-[NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
+[NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
 };
 
 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
@@ -4847,8 +4847,9 @@ static int disas_neon_data_insn(CPUARMState * env, 
DisasContext *s, uint32_t ins
 return 1;
 }
 break;
-case NEON_3R_VRECPS_VRSQRTS:
-if (u) {
+case NEON_3R_FLOAT_MISC:
+/* VMAXNM/VMINNM in ARMv8 */
+if (u  !arm_feature(env, ARM_FEATURE_V8)) {
 return 1;
 }
 break;
@@ -5137,11 +5138,23 @@ static int disas_neon_data_insn(CPUARMState * env, 
DisasContext *s, uint32_t ins
 tcg_temp_free_ptr(fpstatus);
 break;
 }
-case NEON_3R_VRECPS_VRSQRTS:
-if (size == 0)
-gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
-else
-gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+case NEON_3R_FLOAT_MISC:
+if (u) {
+/* VMAXNM/VMINNM */
+TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+if (size == 0) {
+gen_helper_vfp_maxnms(tmp, tmp, tmp2, fpstatus);
+} else {
+gen_helper_vfp_minnms(tmp, tmp, tmp2, fpstatus);
+}
+tcg_temp_free_ptr(fpstatus);
+} else {
+if (size == 0) {
+gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
+} else {
+gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+  }
+}
 break;
 case NEON_3R_VFM:
 {
-- 
1.8.1.4

[Qemu-devel] [PATCH v8 0/6] target-arm: Add support for VSEL and VMIN/MAXNM.

2013-12-04 Thread Will Newton

This series adds support for three new instructions added in ARMv8 -
VSEL, VMINNM and VMAXNM.

Will Newton (6):
  target-arm: Move call to disas_vfp_insn out of disas_coproc_insn.
  target-arm: Implement ARMv8 VSEL instruction.
  softfloat: Remove unused argument from MINMAX macro.
  softfloat: Add minNum() and maxNum() functions to softfloat.
  target-arm: Implement ARMv8 FP VMAXNM and VMINNM instructions.
  target-arm: Implement ARMv8 SIMD VMAXNM and VMINNM instructions.

 fpu/softfloat.c |  38 ++--
 include/fpu/softfloat.h |   4 +
 target-arm/helper.c |  25 +
 target-arm/helper.h |   5 +
 target-arm/translate.c  | 246 +---
 5 files changed, 298 insertions(+), 20 deletions(-)

-- 
1.8.1.4

[Qemu-devel] [PATCH v8 5/6] target-arm: Implement ARMv8 FP VMAXNM and VMINNM instructions.

2013-12-04 Thread Will Newton

This adds support for the ARMv8 floating point VMAXNM and VMINNM
instructions.

Signed-off-by: Will Newton will.new...@linaro.org
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
 target-arm/helper.c| 25 +
 target-arm/helper.h|  5 +
 target-arm/translate.c | 50 ++
 3 files changed, 80 insertions(+)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 3445813..7507240 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -4079,3 +4079,28 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, 
float64 c, void *fpstp)
 float_status *fpst = fpstp;
 return float64_muladd(a, b, c, 0, fpst);
 }
+
+/* ARMv8 VMAXNM/VMINNM */
+float32 VFP_HELPER(maxnm, s)(float32 a, float32 b, void *fpstp)
+{
+float_status *fpst = fpstp;
+return float32_maxnum(a, b, fpst);
+}
+
+float64 VFP_HELPER(maxnm, d)(float64 a, float64 b, void *fpstp)
+{
+float_status *fpst = fpstp;
+return float64_maxnum(a, b, fpst);
+}
+
+float32 VFP_HELPER(minnm, s)(float32 a, float32 b, void *fpstp)
+{
+float_status *fpst = fpstp;
+return float32_minnum(a, b, fpst);
+}
+
+float64 VFP_HELPER(minnm, d)(float64 a, float64 b, void *fpstp)
+{
+float_status *fpst = fpstp;
+return float64_minnum(a, b, fpst);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index cac9564..d459a39 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -132,6 +132,11 @@ DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
 DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 
+DEF_HELPER_3(vfp_maxnmd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnms, f32, f32, f32, ptr)
+DEF_HELPER_3(vfp_minnmd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnms, f32, f32, f32, ptr)
+
 DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
 DEF_HELPER_2(recpe_f32, f32, f32, env)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 0a22ad8..9a8069e 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2723,6 +2723,54 @@ static int handle_vsel(uint32_t insn, uint32_t rd, 
uint32_t rn, uint32_t rm,
 return 0;
 }
 
+static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
+uint32_t rm, uint32_t dp)
+{
+uint32_t vmin = extract32(insn, 6, 1);
+TCGv_ptr fpst = get_fpstatus_ptr(0);
+
+if (dp) {
+TCGv_i64 frn, frm, dest;
+
+frn = tcg_temp_new_i64();
+frm = tcg_temp_new_i64();
+dest = tcg_temp_new_i64();
+
+tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
+tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
+if (vmin) {
+gen_helper_vfp_minnmd(dest, frn, frm, fpst);
+} else {
+gen_helper_vfp_maxnmd(dest, frn, frm, fpst);
+}
+tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
+tcg_temp_free_i64(frn);
+tcg_temp_free_i64(frm);
+tcg_temp_free_i64(dest);
+} else {
+TCGv_i32 frn, frm, dest;
+
+frn = tcg_temp_new_i32();
+frm = tcg_temp_new_i32();
+dest = tcg_temp_new_i32();
+
+tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
+tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
+if (vmin) {
+gen_helper_vfp_minnms(dest, frn, frm, fpst);
+} else {
+gen_helper_vfp_maxnms(dest, frn, frm, fpst);
+}
+tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
+tcg_temp_free_i32(frn);
+tcg_temp_free_i32(frm);
+tcg_temp_free_i32(dest);
+}
+
+tcg_temp_free_ptr(fpst);
+return 0;
+}
+
 static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
 {
 uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
@@ -2743,6 +2791,8 @@ static int disas_vfp_v8_insn(CPUARMState *env, 
DisasContext *s, uint32_t insn)
 
 if ((insn  0x0f800e50) == 0x0e000a00) {
 return handle_vsel(insn, rd, rn, rm, dp);
+} else if ((insn  0x0fb00e10) == 0x0e800a00) {
+return handle_vminmaxnm(insn, rd, rn, rm, dp);
 }
 return 1;
 }
-- 
1.8.1.4

[Qemu-devel] [PATCH v8 3/6] softfloat: Remove unused argument from MINMAX macro.

2013-12-04 Thread Will Newton

The nan_exp argument is not used, so remove it.

Signed-off-by: Will Newton will.new...@linaro.org
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
 fpu/softfloat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 7ba51b6..97bf627 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -6706,7 +6706,7 @@ int float128_compare_quiet( float128 a, float128 b 
STATUS_PARAM )
  * 'compare and pick one input' because that would mishandle
  * NaNs and +0 vs -0.
  */
-#define MINMAX(s, nan_exp)  \
+#define MINMAX(s)   \
 INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
 int ismin STATUS_PARAM )\
 {   \
@@ -6747,8 +6747,8 @@ float ## s float ## s ## _max(float ## s a, float ## s b 
STATUS_PARAM)  \
 return float ## s ## _minmax(a, b, 0 STATUS_VAR);   \
 }
 
-MINMAX(32, 0xff)
-MINMAX(64, 0x7ff)
+MINMAX(32)
+MINMAX(64)
 
 
 /* Multiply A by 2 raised to the power N.  */
-- 
1.8.1.4

[Qemu-devel] [PATCH v8 4/6] softfloat: Add minNum() and maxNum() functions to softfloat.

2013-12-04 Thread Will Newton

Add floatnn_minnum() and floatnn_maxnum() functions which are equivalent
to the minNum() and maxNum() functions from IEEE 754-2008. They are
similar to min() and max() but differ in the handling of QNaN arguments.

Signed-off-by: Will Newton will.new...@linaro.org
---
 fpu/softfloat.c | 32 +---
 include/fpu/softfloat.h |  4 
 2 files changed, 33 insertions(+), 3 deletions(-)

Changes in v8:
 - Use existing MINMAX macro to implement minnum and maxnum
 - Improve comment

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 97bf627..dbda61b 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -6705,10 +6705,17 @@ int float128_compare_quiet( float128 a, float128 b 
STATUS_PARAM )
 /* min() and max() functions. These can't be implemented as
  * 'compare and pick one input' because that would mishandle
  * NaNs and +0 vs -0.
+ *
+ * minnum() and maxnum() functions. These are similar to the min()
+ * and max() functions but if one of the arguments is a QNaN and
+ * the other is numerical then the numerical argument is returned.
+ * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
+ * and maxNum() operations. min() and max() are the typical min/max
+ * semantics provided by many CPUs which predate that specification.
  */
 #define MINMAX(s)   \
 INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
-int ismin STATUS_PARAM )\
+int ismin, int isieee STATUS_PARAM) \
 {   \
 flag aSign, bSign;  \
 uint ## s ## _t av, bv; \
@@ -6716,6 +6723,15 @@ INLINE float ## s float ## s ## _minmax(float ## s a, 
float ## s b, \
 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
 if (float ## s ## _is_any_nan(a) || \
 float ## s ## _is_any_nan(b)) { \
+if (isieee) {   \
+if (float ## s ## _is_quiet_nan(a)\
+!float ## s ##_is_any_nan(b)) { \
+return b;   \
+} else if (float ## s ## _is_quiet_nan(b) \
+   !float ## s ## _is_any_nan(a)) { \
+return a;   \
+}   \
+}   \
 return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
 }   \
 aSign = extractFloat ## s ## Sign(a);   \
@@ -6739,12 +6755,22 @@ INLINE float ## s float ## s ## _minmax(float ## s a, 
float ## s b, \
 \
 float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
 {   \
-return float ## s ## _minmax(a, b, 1 STATUS_VAR);   \
+return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR);\
 }   \
 \
 float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
 {   \
-return float ## s ## _minmax(a, b, 0 STATUS_VAR);   \
+return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR);\
+}   \
+\
+float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
+{   \
+return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR);\
+}   \
+\
+float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
+{   \
+return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR);\
 }
 
 MINMAX(32)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index f3927e2..2365274 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -302,6 +302,8 @@ int float32_compare( float32, float32 STATUS_PARAM );
 int float32_compare_quiet( float32,

Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP

2013-12-04 Thread Alexey Kardashevskiy

On 12/04/2013 08:33 PM, Markus Armbruster wrote:
 Paolo Bonzini pbonz...@redhat.com writes:
 
 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 Normally the user is expected to eject DVD if it is not locked by
 the guest. eject_device() makes few checks and calls bdrv_close()
 if DVD is not in use.

 However it is still possible to eject DVD even if it is in use.
 For that, QEMU sets eject requested flag, the guest reads it, issues
 ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case,
 bdrv_close() is not called anywhere so it remains inserted in QEMU's
 terms.

 This is expected behavior, and matches what IDE does.

 Markus, can you confirm?
 
 Confirmed.  See commit 4be9762.
 
 Alexey, monitor commands eject does two things: it first opens the tray,
 and if that works, it removes the medium.
 
 If the tray is locked closed, it tells the device model that eject was
 requested.  Works just like the physical eject button.
 
 With -f, it then rips out the medium.  This is similar to opening the
 tray with a unbent paperclip.  Let's ignore this case.
 
 The scsi-cd device model tells the guest about the eject request.  A
 well-behaved guest will then command the device to unlock and open the
 tray.
 
 The guest uses the same commands on behalf of its applications,
 e.g. /usr/bin/eject.
 
 Your patch changes behavior of eject /dev/sr0  eject -t /dev/sr0:
 you no longer get the same medium back.  You normally do with real
 hardware.
 
 The somewhat unfortunate consequence is that monitor command eject can
 only remove the medium when the tray is not locked.


Oh. Wow. Nice :-/

Ok. So. It is expected that the real system will close the tray back if it
was mounted, is not it?

Right now, after eject info block is like this:

cd1: virtimg/Fedora-19-ppc64-netinst.iso (raw)
Removable device: locked, tray open

And the mountpoint does not work in the guest. The state above even
persists after umount in the guest. It only becomes correct again
(tray==closed) when I mount DVD again.

Is it all expected to work like this? Thanks.


-- 
Alexey

Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL

2013-12-04 Thread Alexey Kardashevskiy

On 12/04/2013 08:35 PM, Markus Armbruster wrote:
 Paolo Bonzini pbonz...@redhat.com writes:
 
 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 This prevents the guest from preventing DVD medium removal when
 there is no medium.

 Without this, if the user has ejected a DVD, it is possible to
 have a block device with an open tray, no media but locked.

 I need to check this against a real disk.  Thanks!
 
 If I remember correctly, I did, and it works.  You can still close the
 open tray in this state.


I thought it is about NOT letting close the open tray. So the hardware can
be programmed to ignore the hardware eject button when the tray is open?


-- 
Alexey

Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode

2013-12-04 Thread Fedorov Sergey



On 12/04/2013 03:18 PM, Peter Maydell wrote:

On 4 December 2013 10:58, Peter Crosthwaite
peter.crosthwa...@xilinx.com wrote:

So what im proposing is just a slightly more general patch. Is it
really any more complicated than just applying your change pattern for
the hyp mode?

I think it would be, because of the wrinkle that hyp mode doesn't
have a banked LR, so the existing assume we can just translate
the mode into a single index good for both LR and SP logic
would break.

The minimal change if we wanted to keep VMSD bumps to a
minimum would be to increase the size of the banked_spsr[]
and banked_r13[] arrays to allow for Hyp mode but do nothing
else (except add a comment about it I guess).


If we want to bump VMSD just once for monitor + hypervisor mode then we 
need to add ELR_hyp register definition too. I think then it would be 
better to implement hypervisor mode and its special banking scheme, too. 
But I doubt it worth to combine these two things into one patch.





The motiviation is less VMSD version bumps in ARM CPU (a place where I
expect assume such version bumps to be considerable annoyance).

Well, they're only a problem at the point where we start trying
to support cross-version migration; we're not at that point yet...

thanks
-- PMM



Best regards,
Sergey Fedorov

Re: [Qemu-devel] [RFC PATCH 05/21] target-arm: add CPU Monitor mode

2013-12-04 Thread Peter Maydell

On 4 December 2013 12:33, Fedorov Sergey s.fedo...@samsung.com wrote:

 On 12/04/2013 03:18 PM, Peter Maydell wrote:

 On 4 December 2013 10:58, Peter Crosthwaite
 peter.crosthwa...@xilinx.com wrote:

 So what im proposing is just a slightly more general patch. Is it
 really any more complicated than just applying your change pattern for
 the hyp mode?

 I think it would be, because of the wrinkle that hyp mode doesn't
 have a banked LR, so the existing assume we can just translate
 the mode into a single index good for both LR and SP logic
 would break.

 The minimal change if we wanted to keep VMSD bumps to a
 minimum would be to increase the size of the banked_spsr[]
 and banked_r13[] arrays to allow for Hyp mode but do nothing
 else (except add a comment about it I guess).


 If we want to bump VMSD just once for monitor + hypervisor mode then we need
 to add ELR_hyp register definition too. I think then it would be better to
 implement hypervisor mode and its special banking scheme, too. But I doubt
 it worth to combine these two things into one patch.

It's possible to add single new fields to the VMState without
requiring a compatibility break, by marking the new field as
only present in version X or greater; new elements on the
end of arrays are a little fiddlier.

But yes, I think we should just not worry about possible future
Hyp mode now. Let's stick with your current patch.

thanks
-- PMM

[Qemu-devel] [PATCH] qemu-iotests: 074 fixups

2013-12-04 Thread Stefan Hajnoczi

A few things missing from the previous patch:
 * add 074 to the group file
 * label new output 074 instead of 048
 * remove qemu-io  prompts from output (qemu.git/master merge
   conflict)

Signed-off-by: Stefan Hajnoczi stefa...@redhat.com
---
Fam: can I squash these missing things into your patch?

 tests/qemu-iotests/074 |  0
 tests/qemu-iotests/074.out | 10 +-
 tests/qemu-iotests/group   |  1 +
 3 files changed, 6 insertions(+), 5 deletions(-)
 mode change 100644 = 100755 tests/qemu-iotests/074

diff --git a/tests/qemu-iotests/074 b/tests/qemu-iotests/074
old mode 100644
new mode 100755
diff --git a/tests/qemu-iotests/074.out b/tests/qemu-iotests/074.out
index 3deb594..8fba5ae 100644
--- a/tests/qemu-iotests/074.out
+++ b/tests/qemu-iotests/074.out
@@ -1,17 +1,17 @@
-QA output created by 048
+QA output created by 074
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 === IO: pattern 102
-qemu-io wrote 512/512 bytes at offset 512
+wrote 512/512 bytes at offset 512
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-io qemu-img: Error while reading offset 0 of 
blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+qemu-img: Error while reading offset 0 of 
blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
 qemu-img: Error while reading offset 0: Input/output error
 4
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=0
 === IO: pattern 102
-qemu-io wrote 512/512 bytes at offset 512
+wrote 512/512 bytes at offset 512
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-io qemu-img: Error while reading offset 0 of 
blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
+qemu-img: Error while reading offset 0 of 
blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
 qemu-img: Error while reading offset 0 of 
blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT: Input/output error
 Warning: Image size mismatch!
 4
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 303e0f3..ed10720 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -77,3 +77,4 @@
 069 rw auto
 070 rw auto
 073 rw auto
+074 rw auto
-- 
1.8.4.2

Re: [Qemu-devel] [RFC PATCH 00/21] target-arm: add CPU core TrustZone support

2013-12-04 Thread Fedorov Sergey



On 12/04/2013 03:13 PM, Peter Maydell wrote:

On 4 December 2013 10:08, Fedorov Sergey s.fedo...@samsung.com wrote:

On 12/03/2013 12:48 PM, Sergey Fedorov wrote:

This patch set implements a basic support of CPU core TrustZone feature.

We'd like this patch series finally to be merged into mainstream. What
should be done to achieve this goal?

I'd like to see TZ support in mainline too.


That is the most important for me now :-) So I will try these patches to 
get shape suitable for mainline. Thanks!



The high level answer
is that it needs to get code reviewed, and you need to fix issues
that are raised in code review. Unfortunately my review queue is
currently pretty full (it has Allwinner board support, DIGIC board
support, a bunch of Cadence fixes, ARMv8 32 bit new instructions
and the A64 64 bit instruction support in it, all of which are fairly
big patchsets), so it may take me a little while to get to this
patchset. It is on my todo list though, so it won't get forgotten :-)

thanks
-- PMM



Best regards,
Sergey Fedorov

Re: [Qemu-devel] [Qemu-trivial] [PATCH] vl: remove (max_cpus 255) check from smp_parse

2013-12-04 Thread Eduardo Habkost

On Wed, Dec 04, 2013 at 04:50:59PM +1100, Alexey Kardashevskiy wrote:
 On 12/04/2013 01:47 AM, Eduardo Habkost wrote:
  On Tue, Dec 03, 2013 at 02:30:48PM +0100, Andreas Färber wrote:
  Am 03.12.2013 00:03, schrieb Alexey Kardashevskiy:
  On 12/03/2013 09:09 AM, Andreas Färber wrote:
  Am 02.12.2013 18:06, schrieb Michael Tokarev:
  25.11.2013 07:39, Alexey Kardashevskiy wrote:
  Since modern POWER7/POWER8 chips can have more that 256 CPU threads
  (2000 actually), remove this check from smp_parse.
 
  The CPUs number is still checked against machine-max_cpus and this 
  check
  should be enough not to break other archs.
 
  should be is not exactly the highest level of confidence for a
  trivial patch... :/
  [...]
  Alexey, did you actually check that, e.g., x86 machines don't break with
  256 or 257 CPUs now?
 
  PC_DEFAULT_MACHINE_OPTIONS sets it to 255. And I cannot find any machine
  which would not define max_cpus, have I missed any?
 
  If you've actually *checked* the other machines' code then fine with me,
  just say so in the commit message. :)
  
  I just grepped for max_cpus and checked every match. The largest
  values I found were:
  
  hw/ppc/spapr.c: 256
  s390: 255
  pc: 255
  
  All the rest had values = 32.
  
  Machines with missing max_cpus value shouldn't be a problem, as
  max_cpus==0 is interpreted as 1 by the vl.c code.
  
  But we still need to add a check for max_cpus  machine-max_cpus to
  vl.c, before we eliminate the smp_parse() check.
 
 
 Since smp_parse() checks if (max_cpus = smp_cpus), this should just work:
 
 diff --git a/vl.c b/vl.c
 index e6ed260..544165a 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -3882,9 +3882,9 @@ int main(int argc, char **argv, char **envp)
  smp_parse(qemu_opts_find(qemu_find_opts(smp-opts), NULL));
 
  machine-max_cpus = machine-max_cpus ?: 1; /* Default to UP */
 -if (smp_cpus  machine-max_cpus) {
 +if (max_cpus  machine-max_cpus) {
  fprintf(stderr, Number of SMP cpus requested (%d), exceeds max cpus 
 
 -supported by machine `%s' (%d)\n, smp_cpus,  machine-name,
 +supported by machine `%s' (%d)\n, max_cpus,  machine-name,
  machine-max_cpus);
  exit(1);
  }
 
 
  There's also this, at main():
  
  if (i == nb_numa_nodes) {
  for (i = 0; i  max_cpus; i++) {
  set_bit(i, node_cpumask[i % nb_numa_nodes]);
  }
  }
  
  node_cpumask[] is initialized using bitmap_new(MAX_CPUMASK_BITS), and
  MAX_CPUMASK_BITS is 255. To fix this, we can initialize node_cpumask[] using
  max_cpus instead, if we initialize it after smp_parse().
 
 
 Nope. At the moment when we parse -numa in vl.c, we may not know yet what
 machine is going to be used and machines can have different max_cpus.

This will be changed by:

  Subject: [PATCH V17 04/11] NUMA: convert -numa option to use OptsVisitor
  Message-Id: 1386143939-19142-5-git-send-email-gaowanl...@cn.fujitsu.com
  http://article.gmane.org/gmane.comp.emulators.qemu/244826

 
 For now, I would simply change MAX_CPUMASK_BITS to something crazy, like
 16384 (2KB per numa node), I hope QEMU can survive such a memory waste :)
 
 Ok?

I'm OK with that as long the code has proper checks in case max_cpus
gets set to a crazily large value (larger than MAX_CPUMASK_BITS) in the
far future, or if we prevent max_cpus from being larger than
MAX_CPUMASK_BITS.

-- 
Eduardo

Re: [Qemu-devel] [PATCH 2/3] scsi-disk: check for meduim on ALLOW_MEDIUM_REMOVAL

2013-12-04 Thread Markus Armbruster

Alexey Kardashevskiy a...@ozlabs.ru writes:

 On 12/04/2013 08:35 PM, Markus Armbruster wrote:
 Paolo Bonzini pbonz...@redhat.com writes:
 
 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 This prevents the guest from preventing DVD medium removal when
 there is no medium.

 Without this, if the user has ejected a DVD, it is possible to
 have a block device with an open tray, no media but locked.

 I need to check this against a real disk.  Thanks!
 
 If I remember correctly, I did, and it works.  You can still close the
 open tray in this state.


 I thought it is about NOT letting close the open tray. So the hardware can
 be programmed to ignore the hardware eject button when the tray is open?

From memory: you can lock the medium whether the tray is open or not.
The lock applies to the button operating the tray.  It prevents medium
eject.  It never prevents closing an empty tray.  I don't remember
whether opening an empty tray is still permitted.

Re: [Qemu-devel] [PATCH] qemu-iotests: 074 fixups

2013-12-04 Thread Fam Zheng


On 2013年12月04日 20:45, Stefan Hajnoczi wrote:

A few things missing from the previous patch:
  * add 074 to the group file
  * label new output 074 instead of 048
  * remove qemu-io  prompts from output (qemu.git/master merge
conflict)

Signed-off-by: Stefan Hajnoczi stefa...@redhat.com
---
Fam: can I squash these missing things into your patch?



Of course. Thanks.

Fam

Re: [Qemu-devel] [PATCH 1/3] scsi-disk: close drive on START_STOP

2013-12-04 Thread Markus Armbruster

Alexey Kardashevskiy a...@ozlabs.ru writes:

 On 12/04/2013 08:33 PM, Markus Armbruster wrote:
 Paolo Bonzini pbonz...@redhat.com writes:
 
 Il 04/12/2013 05:55, Alexey Kardashevskiy ha scritto:
 Normally the user is expected to eject DVD if it is not locked by
 the guest. eject_device() makes few checks and calls bdrv_close()
 if DVD is not in use.

 However it is still possible to eject DVD even if it is in use.
 For that, QEMU sets eject requested flag, the guest reads it, issues
 ALLOW_MEDIUM_REMOVAL(enable=1) and START_STOP(start=0). But in this case,
 bdrv_close() is not called anywhere so it remains inserted in QEMU's
 terms.

 This is expected behavior, and matches what IDE does.

 Markus, can you confirm?
 
 Confirmed.  See commit 4be9762.
 
 Alexey, monitor commands eject does two things: it first opens the tray,
 and if that works, it removes the medium.
 
 If the tray is locked closed, it tells the device model that eject was
 requested.  Works just like the physical eject button.
 
 With -f, it then rips out the medium.  This is similar to opening the
 tray with a unbent paperclip.  Let's ignore this case.
 
 The scsi-cd device model tells the guest about the eject request.  A
 well-behaved guest will then command the device to unlock and open the
 tray.
 
 The guest uses the same commands on behalf of its applications,
 e.g. /usr/bin/eject.
 
 Your patch changes behavior of eject /dev/sr0  eject -t /dev/sr0:
 you no longer get the same medium back.  You normally do with real
 hardware.
 
 The somewhat unfortunate consequence is that monitor command eject can
 only remove the medium when the tray is not locked.


 Oh. Wow. Nice :-/

 Ok. So. It is expected that the real system will close the tray back if it
 was mounted, is not it?

 Right now, after eject info block is like this:

 cd1: virtimg/Fedora-19-ppc64-netinst.iso (raw)
 Removable device: locked, tray open

 And the mountpoint does not work in the guest. The state above even
 persists after umount in the guest. It only becomes correct again
 (tray==closed) when I mount DVD again.

 Is it all expected to work like this? Thanks.

Can't reproduce, but can reproduce something similar.  Freshly booted
guest running RHEL-7 alpha, with the CD mounted:

(qemu) info block cd

cd: r7.iso (raw, read-only)
Removable device: locked, tray closed

Looks good.  Try to eject:

(qemu) eject cd
Device 'cd' is locked

Looks good.  This should have signalled the guest user wants to eject.
The guest should either ignore it, or unmount, unlock and eject.
Apparently, it does that:

(qemu) info block cd

cd: r7.iso (raw, read-only)
Removable device: locked, tray closed
(qemu) eject cd
Device 'cd' is locked
(qemu) info block cd

cd: r7.iso (raw, read-only)
Removable device: locked, tray closed
(qemu) info block cd

cd: r7.iso (raw, read-only)
Removable device: not locked, tray open

Except it forgets to unmount!  dmesg has VFS: busy inodes on changed
media or resized disk sr0.

Need somebody to find out how exactly this fails, and whether it's a
guest bug or a QEMU bug.

Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default

2013-12-04 Thread Eduardo Otubo




The existing approach clearly doesn't support the full range of options
that users specify on the command-line.


Bugs.  It will get fixed in time with more testing/debugging.  Eduardo is
working on improving the testing and RH's QA folks are working hard to shake
out the bugs too.  I just posted another bug fix patch to the whitelist a few
days ago.


Exactly, I'm working close with virt-test team to improve the testing 
and feedback for possible illegal syscalls on various scenarios.





So I guess the options are:

1. Don't make it the default since it breaks stuff but use it for very
specific scenarios (e.g. libvirt use cases that have been well tested).


In my opinion, I think it was probably a bit premature to make enable it by
default, but at some point in the future I think we do need to do this.


I have to admit it was a little premature, yes. But I think once we have 
a stable set of tool in virt-test, we can turn it on by default in a 
near future.





2. Provide a kind of syscall set for various QEMU options and apply the
union of them at launch.  This still seems fragile but in theory it
could work.


This is what I was discussing above.  I think this is likely the next big
improvement.



That's the feature I'm currently working on right now. We'll see some 
improvements in the future. :)



--
Eduardo Otubo
IBM Linux Technology Center

Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default

2013-12-04 Thread Eduardo Otubo




On 12/04/2013 07:39 AM, Stefan Hajnoczi wrote:

On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote:

Developers will only be happy with seccomp if it's easy and rewarding to
support/debug.


Agreed.

As a developer, how do you feel about the audit/syslog based approach I
mentioned earlier?


I used the commands you posted (I think that's what you mean).  They
produce useful output.

The problem is that without an error message on stderr or from the
shell, no one will think QEMU process dead and hung == check seccomp
immediately.  It's frustrating to deal with a silent failure.


The process dies with a SIGKILL, and sig handling in Qemu is hard to 
implement due to dozen of external linked libraries that has their own 
signal masks and conflicts with seccomp. I've already tried this 
approach in the past (you can find in the list by searching for debug mode)


The optimal goal here is to use virt-test and audit log to eliminate 
these sorts of things.


--
Eduardo Otubo
IBM Linux Technology Center

Re: [Qemu-devel] [PATCH v6 0/6] Add cache mode option to qemu-iotests, and change default mode to writeback

2013-12-04 Thread Stefan Hajnoczi

On Wed, Dec 04, 2013 at 09:06:57AM +0800, Fam Zheng wrote:
 This series adds cache mode option in the iotests framework. Test cases are
 updated to make use of cache mode and mask supported modes.
 
 v6: [05] Recover disappeared two lines. (Benoît)
 Added Wenchao's reviewed-by lines to other patches.
 
 v5: Fix help test for -c mode. (Wenchao)
 
 v4: Address Stefan's comments:
 Add _default_cache_mode.
 Split last two cases in 048 to 074.
 Use long option --cache instead of -t for qemu-io.
 
 v3: Change _unsupported_qemu_io_options to _supported_cache_modes.
 Change default mode to writeback.
 Clean up some whitespaces in the end of series.
 Fix 026.out.nocache case.
 Fix 048 case on tmpfs.
 
 
 Fam Zheng (6):
   qemu-iotests: Add -c cache-mode option
   qemu-iotests: Honour cache mode in iotests.py
   qemu-iotests: Add _default_cache_mode and _supported_cache_modes
   qemu-iotests: Change default cache mode to writeback
   qemu-iotests: Clean up spaces in usage output
   qemu-iotests: Split qcow2 only cases in 048
 
  tests/qemu-iotests/026|  3 +-
  tests/qemu-iotests/039|  3 +-
  tests/qemu-iotests/048| 27 --
  tests/qemu-iotests/048.out| 16 
  tests/qemu-iotests/052|  4 +-
  tests/qemu-iotests/074| 86 
 +++
  tests/qemu-iotests/074.out| 18 +
  tests/qemu-iotests/check  |  2 +-
  tests/qemu-iotests/common | 37 ++-
  tests/qemu-iotests/common.rc  | 25 -
  tests/qemu-iotests/iotests.py |  3 +-
  11 files changed, 155 insertions(+), 69 deletions(-)
  create mode 100644 tests/qemu-iotests/074
  create mode 100644 tests/qemu-iotests/074.out

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan

Re: [Qemu-devel] [PATCH 3/5] Add vhostsock option

2013-12-04 Thread Stefan Hajnoczi

On Fri, Nov 29, 2013 at 08:52:24PM +0100, Antonios Motakis wrote:
 @@ -91,15 +91,27 @@ static int vhost_net_get_fd(NetClientState *backend)
  }
  }
  
 -struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
 - bool force)
 +struct vhost_net *vhost_net_init(NetClientState *backend, char *vhostsock,
 + int devfd, bool force)
  {
  int r;
  struct vhost_net *net = g_malloc(sizeof *net);
 +const char *backend_sock = 0;
 +VhostBackendType backend_type = VHOST_BACKEND_TYPE_NONE;
 +
  if (!backend) {
  fprintf(stderr, vhost-net requires backend to be setup\n);
  goto fail;
  }
 +
 +if (vhostsock  strcmp(vhostsock, VHOST_NET_DEFAULT_SOCK) != 0) {

This is a weird hack.  Why check for VHOST_NET_DEFAULT_SOCK at all?

If the option is not present then kernel vhost is used, if the option is
present then userspace vhost is used.  I don't understand why a magic
hardcoded path is useful.

Re: [Qemu-devel] [PATCH 2/5] Add vhost-kernel and the vhost-user skeleton

2013-12-04 Thread Stefan Hajnoczi

On Fri, Nov 29, 2013 at 08:52:23PM +0100, Antonios Motakis wrote:
 diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
 index 05de174..80defe4 100644
 --- a/hw/virtio/vhost-backend.c
 +++ b/hw/virtio/vhost-backend.c
 @@ -25,9 +25,18 @@ static int vhost_kernel_call(struct vhost_dev *dev, 
 unsigned long int request,
  
  int vhost_call(struct vhost_dev *dev, unsigned long int request, void *arg)
  {
 -int result;
 +int result = -1;
  
 -result = vhost_kernel_call(dev, request, arg);
 +switch (dev-backend_type) {
 +case VHOST_BACKEND_TYPE_KERNEL:
 +result = vhost_kernel_call(dev, request, arg);
 +break;
 +case VHOST_BACKEND_TYPE_USER:
 +fprintf(stderr, vhost-user not implemented\n);
 +break;
 +default:
 +fprintf(stderr, Unknown vhost backend type\n);
 +}

The switch statement approach gets messy fast when local variables are
needed inside some case labels.  It also makes it hard to conditionally
compile features without using #ifdef.

Perhaps instead a VhostOps struct could be used:

/* Vhost backends implement this interface */
typedef struct {
int vhost_call(struct vhost_dev *dev,
   unsigned long int request,
   void *arg);
...
} VhostOps;

const VhostOps vhost_kernel_ops = {
...
};

const VhostOps vhost_user_opts = {
...
};

ret = dev-vhost_ops-vhost_call(dev, request, arg);

Something along those lines.  It keeps the different backend
implementations separate (they can live in separate files and be
conditional in Makefile.objs).

Re: [Qemu-devel] [RFC 0/5] Vhost and vhost-net support for userspace based backends

2013-12-04 Thread Stefan Hajnoczi

On Fri, Nov 29, 2013 at 08:52:21PM +0100, Antonios Motakis wrote:
 In this patch series we would like to introduce our approach for putting a
 virtio-net backend in an external userspace process. Our eventual target is to
 run the network backend in the Snabbswitch ethernet switch, while receiving
 traffic from a guest inside QEMU/KVM which runs an unmodified virtio-net
 implementation.
 
 For this, we are working into extending vhost to allow equivalent 
 functionality
 for userspace. Vhost already passes control of the data plane of virtio-net to
 the host kernel; we want to realize a similar model, but for userspace.
 
 In this patch series the concept of a vhost-backend is introduced.
 
 We define two vhost backend types - vhost-kernel and vhost-user. The former is
 the interface to the current kernel module implementation. Its control plane 
 is
 ioctl based. The data plane is the kernel directly accessing the QEMU 
 allocated,
 guest memory.
 
 In the new vhost-user backend, the control plane is based on communication
 between QEMU and another userspace process using a unix domain socket. This
 allows to implement a virtio backend for a guest running in QEMU, inside the
 other userspace process.

One thing that came to mind when reading the patches is that you are
implementing the vhost interface pretty much exactly as-is.  Did you
look at FUSE's character devices in userspace (CUSE)?  IIRC even ioctl
is supported so you might be able to skip the userspace backend entirely
if you mimic vhost_net.ko's ioctl interface.

Then all that's needed is some configuration/startup code to use shared
memory and pass the eventfds.

Stefan

Re: [Qemu-devel] [PATCH 3/4] dataplane: change vring API to use VirtQueueElement

2013-12-04 Thread Stefan Hajnoczi

On Thu, Oct 10, 2013 at 05:07:18PM +0200, Paolo Bonzini wrote:
 @@ -298,30 +278,31 @@ static void handle_notify(EventNotifier *e)
  vring_disable_notification(s-vdev, s-vring);
  
  for (;;) {
 -head = vring_pop(s-vdev, s-vring, iov, end, out_num, 
 in_num);
 -if (head  0) {
 +ret = vring_pop(s-vdev, s-vring, elem);
 +if (ret  0) {
 +assert(elem == NULL);
  break; /* no more requests */
  }
  
 -trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
 -head);
 +trace_virtio_blk_data_plane_process_request(s, elem-out_num,
 +elem-in_num, 
 elem-index);
  
 -if (process_request(s-ioqueue, iov, out_num, in_num, head)  
 0) {
 +if (process_request(s-ioqueue, elem)  0) {
  vring_set_broken(s-vring);
 +vring_push(s-vring, elem, 0);

If we give up on the vring I don't think we should push the element
back.  It may cause the guest to panic.

I guess what we really need here is to unmap scatter-gather buffers and
delete elem.

Stefan

Re: [Qemu-devel] [PATCH 0/4] dataplane: use more of the generic virtio data structures, drop hostmem

2013-12-04 Thread Stefan Hajnoczi

On Thu, Oct 10, 2013 at 05:07:15PM +0200, Paolo Bonzini wrote:
 Now that the memory API is thread-safe, we can use it in
 virtio-blk-dataplane and replace hostmem.[ch].  This series does this,
 and also changes the vring API to use VirtQueueElement (with an eye
 towards migration).  With this change, virtio-blk-dataplane is also safe
 against memory hot-unplug.
 
 The next step would be to replace memory_region_find with
 address_space_{map,unmap}, which handle dirtying of memory correctly.
 However these APIs are not thread-safe yet, and neither is the handling
 of dirty memory (Juan's patches may be a start here).
 
 Also, the usage of iov_discard_{front,back} may cause some complication
 when we use address_space_{map,unmap}.  We may have to change a bit the
 logic in virtio-blk-dataplane to switch to address_space_{map,unmap}.
 
 If we do not want to do this intermediate step, the first three patches
 can be applied separately from the fourth.
 
 Paolo Bonzini (4):
   vring: create a common function to parse descriptors
   vring: factor common code for error exits
   dataplane: change vring API to use VirtQueueElement
   dataplane: replace hostmem with memory_region_find
 
  hw/block/dataplane/virtio-blk.c   |  86 +---
  hw/virtio/dataplane/Makefile.objs |   2 +-
  hw/virtio/dataplane/hostmem.c | 183 -
  hw/virtio/dataplane/vring.c   | 244 
 +-
  include/hw/virtio/dataplane/hostmem.h |  58 
  include/hw/virtio/dataplane/vring.h   |   9 +-
  6 files changed, 193 insertions(+), 389 deletions(-)
  delete mode 100644 hw/virtio/dataplane/hostmem.c
  delete mode 100644 include/hw/virtio/dataplane/hostmem.h

Finally looked into this series.  I think we should merge all patches so
we start exercising the thread-safe memory API as soon as possible in
the QEMU 2.0 release cycle.

Before merging I wanted to discuss the vring-broken case which I've
left a comment on.

Stefan

Re: [Qemu-devel] [PATCH 3/5] Add vhostsock option

2013-12-04 Thread Eric Blake

On 11/29/2013 12:52 PM, Antonios Motakis wrote:
 Adding a new tap network backend option - vhostsock. It points
 to a named unix domain socket, that will be used to communicate
 with vhost-user backend. This is a temporary work around; in future
 versions of this series vhost-user will be made independent from
 the tap network backend.
 

 +++ b/qapi-schema.json
 @@ -2891,6 +2891,8 @@
  #
  # @vhostforce: #optional vhost on for non-MSIX virtio guests
  #
 +# @vhostsock: #optional vhost backend socket
 +#

Needs a (since 2.0) designation.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH 04/32] tests: Fix schema parser test for in-tree build

2013-12-04 Thread Michael Roth

From: Markus Armbruster arm...@redhat.com

Commit 4f193e3 added the test, but screwed up in-tree builds
(SRCDIR=.): the tests's output overwrites the expected output, and is
thus compared to itself.

Cc: qemu-sta...@nongnu.org
Reported-by: Laszlo Ersek ler...@redhat.com
Reviewed-by: Andreas Färber afaer...@suse.de
Reviewed-by: Laszlo Ersek ler...@redhat.com
Signed-off-by: Markus Armbruster arm...@redhat.com
Signed-off-by: Michael Tokarev m...@tls.msk.ru
(cherry picked from commit d8039e58b1ecfdc9af171502c83e3949f6dafb95)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 tests/.gitignore |1 +
 tests/Makefile   |8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/.gitignore b/tests/.gitignore
index fb05c2a..d9c2ef4 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -19,3 +19,4 @@ test-thread-pool
 test-x86-cpuid
 test-xbzrle
 *-test
+qapi-schema/*.test.*
diff --git a/tests/Makefile b/tests/Makefile
index d044908..ad98439 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -247,10 +247,10 @@ check-tests/test-qapi.py: tests/test-qapi.py
 
 .PHONY: $(patsubst %, check-%, $(check-qapi-schema-y))
 $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: 
$(SRC_PATH)/%.json
-   $(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts $(PYTHON) 
$(SRC_PATH)/tests/qapi-schema/test-qapi.py $^ $*.out 2$*.err; echo $$? 
$*.exit,   TEST  $*.out)
-   @diff -q $(SRC_PATH)/$*.out $*.out
-   @diff -q $(SRC_PATH)/$*.err $*.err
-   @diff -q $(SRC_PATH)/$*.exit $*.exit
+   $(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts $(PYTHON) 
$(SRC_PATH)/tests/qapi-schema/test-qapi.py $^ $*.test.out 2$*.test.err; echo 
$$? $*.test.exit,   TEST  $*.out)
+   @diff -q $(SRC_PATH)/$*.out $*.test.out
+   @diff -q $(SRC_PATH)/$*.err $*.test.err
+   @diff -q $(SRC_PATH)/$*.exit $*.test.exit
 
 # Consolidated targets
 
-- 
1.7.9.5

[Qemu-devel] [PATCH 05/32] tests: Update .gitignore for test-int128 and test-bitops

2013-12-04 Thread Michael Roth

From: Markus Armbruster arm...@redhat.com

Forgotten in commit 6046c62 and 3464700.

Cc: qemu-sta...@nongnu.org
Reviewed-by: Andreas Färber afaer...@suse.de
Reviewed-by: Laszlo Ersek ler...@redhat.com
Signed-off-by: Markus Armbruster arm...@redhat.com
Signed-off-by: Michael Tokarev m...@tls.msk.ru
(cherry picked from commit 9dbb52e862458935c250bac9e71d5a87da4e33e9)

Conflicts:

tests/.gitignore

*removed post-1.6 additions from diff

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 tests/.gitignore |2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/.gitignore b/tests/.gitignore
index d9c2ef4..9ac044d 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -5,8 +5,10 @@ check-qjson
 check-qlist
 check-qstring
 test-aio
+test-bitops
 test-cutils
 test-hbitmap
+test-int128
 test-iov
 test-mul64
 test-qapi-types.[ch]
-- 
1.7.9.5

[Qemu-devel] Patch Round-up for stable 1.6.2, freeze on 2013-12-06

2013-12-04 Thread Michael Roth

Hi everyone,

The following new patches are queued for QEMU stable v1.6.2:

https://github.com/mdroth/qemu/commits/stable-1.6-staging

The release is planned for 2013-12-09:

http://wiki.qemu.org/Planning/1.6

Please respond here or CC qemu-sta...@nongnu.org on any patches you
think should be included in the release. The cut-off date is
has been extended to 2013-12-06 due to the round-up email going
out late.

Testing/feedback is greatly appreciated.

Thanks!

Alex Williamson (1):
  vfio-pci: Fix multifunction=on

Alexey Kardashevskiy (1):
  memory: fix 128 arithmetic in info mtree

Amit Shah (3):
  char: move backends' io watch tag to CharDriverState
  char: use common function to disable callbacks on chardev close
  char: remove watch callback on chardev detach from frontend

Amos Kong (2):
  virtio-net: fix the memory leak in rxfilter_notify()
  rng-egd: offset the point when repeatedly read from the buffer

Bandan Das (1):
  pci: unregister vmstate_pcibus on unplug

Cole Robinson (1):
  Fix pc migration from qemu = 1.5

Fam Zheng (1):
  vmdk: Fix vmdk_parse_extents

Hans de Goede (1):
  audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every *nano* 
second

Igor Mammedov (1):
  qdev-monitor: Fix crash when device_add is called with abstract driver

Jason Wang (1):
  virtio-net: only delete bh that existed

Markus Armbruster (2):
  tests: Fix schema parser test for in-tree build
  tests: Update .gitignore for test-int128 and test-bitops

Matthew Daley (1):
  xen_disk: mark ioreq as mapped before unmapping in error case

Max Filippov (1):
  exec: fix breakpoint_invalidate when pc may not be translated

Max Reitz (1):
  qcow2: count_contiguous_clusters and compression

Mike Frysinger (1):
  configure: detect endian via compile test

Paolo Bonzini (1):
  monitor: eliminate monitor_event_state_lock

Peter Lieven (1):
  qcow2: fix possible corruption when reading multiple clusters

Peter Maydell (1):
  configure: Explicitly set ARFLAGS so we can build with GNU Make 4.0

Richard Henderson (1):
  Adjust qapi-visit for python-2.4.3

Stefan Hajnoczi (1):
  qdev-monitor: Unref device when device_add fails

Stefan Weil (5):
  tci: Add implementation of rotl_i64, rotr_i64
  bitops: Add rotate functions (rol8, ror8, ...)
  misc: Use new rotate functions
  qemu-char: Fix potential out of bounds access to local arrays
  linux-user: Fix stat64 syscall for SPARC64

Vlad Yasevich (1):
  qom: Fix memory leak in object_property_set_link()

Wenchao Xia (2):
  qapi: fix memleak by adding implict struct functions in dealloc visitor
  tests: fix memleak in error path test for input visitor

 audio/audio.c  |3 +-
 backends/rng-egd.c |4 +-
 block/qcow2-cluster.c  |7 +++-
 block/vmdk.c   |7 +++-
 configure  |   45 +
 exec.c |6 ++-
 hw/block/xen_disk.c|1 +
 hw/misc/vfio.c |7 
 hw/net/virtio-net.c|   10 ++---
 hw/pci-host/piix.c |9 -
 hw/pci-host/q35.c  |   10 -
 hw/pci/pci.c   |8 
 include/hw/i386/pc.h   |8 
 include/hw/pci-host/q35.h  |1 +
 include/qemu/bitops.h  |   80 +
 include/sysemu/char.h  |1 +
 linux-user/syscall.c   |6 +--
 linux-user/syscall_defs.h  |   14 +++
 memory.c   |4 +-
 monitor.c  |6 ---
 qapi/qapi-dealloc-visitor.c|   20 ++
 qdev-monitor.c |8 
 qemu-char.c|   86 +++-
 qom/object.c   |5 ++-
 scripts/qapi-visit.py  |   17 ++--
 target-arm/iwmmxt_helper.c |2 +-
 tcg/optimize.c |   12 ++
 tcg/tci/tcg-target.c   |1 -
 tci.c  |   14 +--
 tests/.gitignore   |3 ++
 tests/Makefile |8 ++--
 tests/test-qmp-input-visitor.c |1 +
 32 files changed, 287 insertions(+), 127 deletions(-)

[Qemu-devel] [PATCH 07/32] bitops: Add rotate functions (rol8, ror8, ...)

2013-12-04 Thread Michael Roth

From: Stefan Weil s...@weilnetz.de

These functions were copies from include/linux/bitopts.h.

Signed-off-by: Stefan Weil s...@weilnetz.de
Reviewed-by: Richard Henderson r...@twiddle.net
(cherry picked from commit 6aa25b4a7bb10c48c3054f268d5be98e42ea42c0)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 include/qemu/bitops.h |   80 +
 1 file changed, 80 insertions(+)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 06e2e6f..304c90c 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -184,6 +184,86 @@ static inline unsigned long hweight_long(unsigned long w)
 }
 
 /**
+ * rol8 - rotate an 8-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint8_t rol8(uint8_t word, unsigned int shift)
+{
+return (word  shift) | (word  (8 - shift));
+}
+
+/**
+ * ror8 - rotate an 8-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint8_t ror8(uint8_t word, unsigned int shift)
+{
+return (word  shift) | (word  (8 - shift));
+}
+
+/**
+ * rol16 - rotate a 16-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint16_t rol16(uint16_t word, unsigned int shift)
+{
+return (word  shift) | (word  (16 - shift));
+}
+
+/**
+ * ror16 - rotate a 16-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint16_t ror16(uint16_t word, unsigned int shift)
+{
+return (word  shift) | (word  (16 - shift));
+}
+
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint32_t rol32(uint32_t word, unsigned int shift)
+{
+return (word  shift) | (word  (32 - shift));
+}
+
+/**
+ * ror32 - rotate a 32-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint32_t ror32(uint32_t word, unsigned int shift)
+{
+return (word  shift) | (word  (32 - shift));
+}
+
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint64_t rol64(uint64_t word, unsigned int shift)
+{
+return (word  shift) | (word  (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline uint64_t ror64(uint64_t word, unsigned int shift)
+{
+return (word  shift) | (word  (64 - shift));
+}
+
+/**
  * extract32:
  * @value: the value to extract the bit field from
  * @start: the lowest bit in the bit field (numbered from 0)
-- 
1.7.9.5

[Qemu-devel] [PATCH 01/32] char: move backends' io watch tag to CharDriverState

2013-12-04 Thread Michael Roth

From: Amit Shah amit.s...@redhat.com

All the backends implement an io watcher tag for callbacks.  Move it to
CharDriverState from each backend's struct to make accessing the tag from
backend-neutral functions easier.

This will be used later to cancel a callback on chardev detach from a
frontend.

CC: qemu-sta...@nongnu.org
Reviewed-by: Gerd Hoffmann kra...@redhat.com
Signed-off-by: Amit Shah amit.s...@redhat.com
(cherry picked from commit 7ba9addc165b37b764baa08c02518b15b2361707)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 include/sysemu/char.h |1 +
 qemu-char.c   |   77 +
 2 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 8053130..ad101d9 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -78,6 +78,7 @@ struct CharDriverState {
 int explicit_be_open;
 int avail_connections;
 int is_mux;
+guint fd_in_tag;
 QemuOpts *opts;
 QTAILQ_ENTRY(CharDriverState) next;
 };
diff --git a/qemu-char.c b/qemu-char.c
index 1621fbd..1dc1646 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -798,7 +798,6 @@ static int io_channel_send(GIOChannel *fd, const void *buf, 
size_t len)
 typedef struct FDCharDriver {
 CharDriverState *chr;
 GIOChannel *fd_in, *fd_out;
-guint fd_in_tag;
 int max_size;
 QTAILQ_ENTRY(FDCharDriver) node;
 } FDCharDriver;
@@ -830,9 +829,9 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition 
cond, void *opaque)
 status = g_io_channel_read_chars(chan, (gchar *)buf,
  len, bytes_read, NULL);
 if (status == G_IO_STATUS_EOF) {
-if (s-fd_in_tag) {
-io_remove_watch_poll(s-fd_in_tag);
-s-fd_in_tag = 0;
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
 }
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 return FALSE;
@@ -863,13 +862,14 @@ static void fd_chr_update_read_handler(CharDriverState 
*chr)
 {
 FDCharDriver *s = chr-opaque;
 
-if (s-fd_in_tag) {
-io_remove_watch_poll(s-fd_in_tag);
-s-fd_in_tag = 0;
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
 }
 
 if (s-fd_in) {
-s-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll, 
fd_chr_read, chr);
+chr-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll,
+   fd_chr_read, chr);
 }
 }
 
@@ -877,9 +877,9 @@ static void fd_chr_close(struct CharDriverState *chr)
 {
 FDCharDriver *s = chr-opaque;
 
-if (s-fd_in_tag) {
-io_remove_watch_poll(s-fd_in_tag);
-s-fd_in_tag = 0;
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
 }
 
 if (s-fd_in) {
@@ -1012,7 +1012,6 @@ static CharDriverState *qemu_chr_open_stdio(ChardevStdio 
*opts)
 
 typedef struct {
 GIOChannel *fd;
-guint fd_tag;
 int connected;
 int read_bytes;
 guint timer_tag;
@@ -1123,9 +1122,9 @@ static void pty_chr_state(CharDriverState *chr, int 
connected)
 PtyCharDriver *s = chr-opaque;
 
 if (!connected) {
-if (s-fd_tag) {
-io_remove_watch_poll(s-fd_tag);
-s-fd_tag = 0;
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
 }
 s-connected = 0;
 /* (re-)connect poll interval for idle guests: once per second.
@@ -1140,7 +1139,8 @@ static void pty_chr_state(CharDriverState *chr, int 
connected)
 if (!s-connected) {
 s-connected = 1;
 qemu_chr_be_generic_open(chr);
-s-fd_tag = io_add_watch_poll(s-fd, pty_chr_read_poll, 
pty_chr_read, chr);
+chr-fd_in_tag = io_add_watch_poll(s-fd, pty_chr_read_poll,
+   pty_chr_read, chr);
 }
 }
 }
@@ -1151,9 +1151,9 @@ static void pty_chr_close(struct CharDriverState *chr)
 PtyCharDriver *s = chr-opaque;
 int fd;
 
-if (s-fd_tag) {
-io_remove_watch_poll(s-fd_tag);
-s-fd_tag = 0;
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
 }
 fd = g_io_channel_unix_get_fd(s-fd);
 g_io_channel_unref(s-fd);
@@ -2161,7 +2161,6 @@ static CharDriverState *qemu_chr_open_stdio(ChardevStdio 
*opts)
 typedef struct {
 int fd;
 GIOChannel *chan;
-guint tag;
 uint8_t buf[READ_BUF_LEN];
 int bufcnt;
 int bufptr;
@@ -2217,9 +2216,9 @@ static gboolean udp_chr_read(GIOChannel *chan, 
GIOCondition cond, void *opaque)
 s-bufcnt = bytes_read;
 s-bufptr = s-bufcnt;
 if (status != G_IO_STATUS_NORMAL) {
-if (s-tag) {
-io_remove_watch_poll(s-tag);
-s-tag = 0;
+if (chr-fd_in_tag) {
+

[Qemu-devel] [PATCH 06/32] tci: Add implementation of rotl_i64, rotr_i64

2013-12-04 Thread Michael Roth

From: Stefan Weil s...@weilnetz.de

It is used by qemu-ppc64 when running Debian's busybox-static.

Cc: qemu-stable qemu-sta...@nongnu.org
Signed-off-by: Stefan Weil s...@weilnetz.de
Reviewed-by: Richard Henderson r...@twiddle.net
(cherry picked from commit d285bf784b6234e994ce73c05c82c9fb6429df00)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 tcg/tci/tcg-target.c |1 -
 tci.c|   10 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index e118bc7..7f86a7f 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -677,7 +677,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const 
TCGArg *args,
 case INDEX_op_shl_i64:
 case INDEX_op_shr_i64:
 case INDEX_op_sar_i64:
-/* TODO: Implementation of rotl_i64, rotr_i64 missing in tci.c. */
 case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
 case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
 tcg_out_r(s, args[0]);
diff --git a/tci.c b/tci.c
index af58576..b09ad25 100644
--- a/tci.c
+++ b/tci.c
@@ -952,8 +952,16 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, 
uint8_t *tb_ptr)
 break;
 #if TCG_TARGET_HAS_rot_i64
 case INDEX_op_rotl_i64:
+t0 = *tb_ptr++;
+t1 = tci_read_ri64(tb_ptr);
+t2 = tci_read_ri64(tb_ptr);
+tci_write_reg64(t0, (t1  t2) | (t1  (64 - t2)));
+break;
 case INDEX_op_rotr_i64:
-TODO();
+t0 = *tb_ptr++;
+t1 = tci_read_ri64(tb_ptr);
+t2 = tci_read_ri64(tb_ptr);
+tci_write_reg64(t0, (t1  t2) | (t1  (64 - t2)));
 break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
-- 
1.7.9.5

[Qemu-devel] [PATCH 02/32] char: use common function to disable callbacks on chardev close

2013-12-04 Thread Michael Roth

From: Amit Shah amit.s...@redhat.com

This deduplicates code used a lot of times.

CC: qemu-sta...@nongnu.org
Reviewed-by: Gerd Hoffmann kra...@redhat.com
Signed-off-by: Amit Shah amit.s...@redhat.com
(cherry picked from commit 26da70c72524eb22c946ab19ec98a217b8252f7e)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 qemu-char.c |   62 ++-
 1 file changed, 19 insertions(+), 43 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 1dc1646..fa00517 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -725,6 +725,14 @@ static void io_remove_watch_poll(guint tag)
 g_source_destroy(iwp-parent);
 }
 
+static void remove_fd_in_watch(CharDriverState *chr)
+{
+if (chr-fd_in_tag) {
+io_remove_watch_poll(chr-fd_in_tag);
+chr-fd_in_tag = 0;
+}
+}
+
 #ifndef _WIN32
 static GIOChannel *io_channel_from_fd(int fd)
 {
@@ -829,10 +837,7 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition 
cond, void *opaque)
 status = g_io_channel_read_chars(chan, (gchar *)buf,
  len, bytes_read, NULL);
 if (status == G_IO_STATUS_EOF) {
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 return FALSE;
 }
@@ -862,11 +867,7 @@ static void fd_chr_update_read_handler(CharDriverState 
*chr)
 {
 FDCharDriver *s = chr-opaque;
 
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
-
+remove_fd_in_watch(chr);
 if (s-fd_in) {
 chr-fd_in_tag = io_add_watch_poll(s-fd_in, fd_chr_read_poll,
fd_chr_read, chr);
@@ -877,11 +878,7 @@ static void fd_chr_close(struct CharDriverState *chr)
 {
 FDCharDriver *s = chr-opaque;
 
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
-
+remove_fd_in_watch(chr);
 if (s-fd_in) {
 g_io_channel_unref(s-fd_in);
 }
@@ -1122,10 +1119,7 @@ static void pty_chr_state(CharDriverState *chr, int 
connected)
 PtyCharDriver *s = chr-opaque;
 
 if (!connected) {
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 s-connected = 0;
 /* (re-)connect poll interval for idle guests: once per second.
  * We check more frequently in case the guests sends data to
@@ -1151,10 +1145,7 @@ static void pty_chr_close(struct CharDriverState *chr)
 PtyCharDriver *s = chr-opaque;
 int fd;
 
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 fd = g_io_channel_unix_get_fd(s-fd);
 g_io_channel_unref(s-fd);
 close(fd);
@@ -2216,10 +2207,7 @@ static gboolean udp_chr_read(GIOChannel *chan, 
GIOCondition cond, void *opaque)
 s-bufcnt = bytes_read;
 s-bufptr = s-bufcnt;
 if (status != G_IO_STATUS_NORMAL) {
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 return FALSE;
 }
 
@@ -2237,11 +2225,7 @@ static void udp_chr_update_read_handler(CharDriverState 
*chr)
 {
 NetCharDriver *s = chr-opaque;
 
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
-
+remove_fd_in_watch(chr);
 if (s-chan) {
 chr-fd_in_tag = io_add_watch_poll(s-chan, udp_chr_read_poll,
udp_chr_read, chr);
@@ -2251,10 +2235,8 @@ static void udp_chr_update_read_handler(CharDriverState 
*chr)
 static void udp_chr_close(CharDriverState *chr)
 {
 NetCharDriver *s = chr-opaque;
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+
+remove_fd_in_watch(chr);
 if (s-chan) {
 g_io_channel_unref(s-chan);
 closesocket(s-fd);
@@ -2489,10 +2471,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, 
GIOCondition cond, void *opaque)
 if (s-listen_chan) {
 s-listen_tag = g_io_add_watch(s-listen_chan, G_IO_IN, 
tcp_chr_accept, chr);
 }
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 g_io_channel_unref(s-chan);
 s-chan = NULL;
 closesocket(s-fd);
@@ -2606,10 +2585,7 @@ static void tcp_chr_close(CharDriverState *chr)
 {
 TCPCharDriver *s = chr-opaque;
 if (s-fd = 0) {
-if (chr-fd_in_tag) {
-io_remove_watch_poll(chr-fd_in_tag);
-chr-fd_in_tag = 0;
-}
+remove_fd_in_watch(chr);
 if (s-chan) {

[Qemu-devel] [PATCH 08/32] misc: Use new rotate functions

2013-12-04 Thread Michael Roth

From: Stefan Weil s...@weilnetz.de

Signed-off-by: Stefan Weil s...@weilnetz.de
(cherry picked from commit 3df2b8fde949be86d8a78923c992fdd698d4ea4c)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 target-arm/iwmmxt_helper.c |2 +-
 tcg/optimize.c |   12 
 tci.c  |8 
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c
index 7953b53..e6cfa62 100644
--- a/target-arm/iwmmxt_helper.c
+++ b/target-arm/iwmmxt_helper.c
@@ -577,7 +577,7 @@ uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, 
uint32_t n)
 
 uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n)
 {
-x = (x  n) | (x  (64 - n));
+x = ror64(x, n);
 env-iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
 return x;
 }
diff --git a/tcg/optimize.c b/tcg/optimize.c
index b35868a..adb5258 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -236,20 +236,16 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg 
x, TCGArg y)
 return (int64_t)x  (int64_t)y;
 
 case INDEX_op_rotr_i32:
-x = ((uint32_t)x  (32 - y)) | ((uint32_t)x  y);
-return x;
+return ror32(x, y);
 
 case INDEX_op_rotr_i64:
-x = ((uint64_t)x  (64 - y)) | ((uint64_t)x  y);
-return x;
+return ror64(x, y);
 
 case INDEX_op_rotl_i32:
-x = ((uint32_t)x  y) | ((uint32_t)x  (32 - y));
-return x;
+return rol32(x, y);
 
 case INDEX_op_rotl_i64:
-x = ((uint64_t)x  y) | ((uint64_t)x  (64 - y));
-return x;
+return rol64(x, y);
 
 CASE_OP_32_64(not):
 return ~x;
diff --git a/tci.c b/tci.c
index b09ad25..53c4b66 100644
--- a/tci.c
+++ b/tci.c
@@ -688,13 +688,13 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, 
uint8_t *tb_ptr)
 t0 = *tb_ptr++;
 t1 = tci_read_ri32(tb_ptr);
 t2 = tci_read_ri32(tb_ptr);
-tci_write_reg32(t0, (t1  t2) | (t1  (32 - t2)));
+tci_write_reg32(t0, rol32(t1, t2));
 break;
 case INDEX_op_rotr_i32:
 t0 = *tb_ptr++;
 t1 = tci_read_ri32(tb_ptr);
 t2 = tci_read_ri32(tb_ptr);
-tci_write_reg32(t0, (t1  t2) | (t1  (32 - t2)));
+tci_write_reg32(t0, ror32(t1, t2));
 break;
 #endif
 #if TCG_TARGET_HAS_deposit_i32
@@ -955,13 +955,13 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, 
uint8_t *tb_ptr)
 t0 = *tb_ptr++;
 t1 = tci_read_ri64(tb_ptr);
 t2 = tci_read_ri64(tb_ptr);
-tci_write_reg64(t0, (t1  t2) | (t1  (64 - t2)));
+tci_write_reg64(t0, rol64(t1, t2));
 break;
 case INDEX_op_rotr_i64:
 t0 = *tb_ptr++;
 t1 = tci_read_ri64(tb_ptr);
 t2 = tci_read_ri64(tb_ptr);
-tci_write_reg64(t0, (t1  t2) | (t1  (64 - t2)));
+tci_write_reg64(t0, ror64(t1, t2));
 break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
-- 
1.7.9.5

[Qemu-devel] [PATCH 32/32] rng-egd: offset the point when repeatedly read from the buffer

2013-12-04 Thread Michael Roth

From: Amos Kong ak...@redhat.com

The buffer content might be read out more than once, currently
we just repeatedly read the first data block, buffer offset is
missing.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Amos Kong ak...@redhat.com
Message-id: 1385023371-8198-3-git-send-email-ak...@redhat.com
Signed-off-by: Anthony Liguori aligu...@amazon.com
(cherry picked from commit 1eb1bd9eafa890f1f4d16ef5cb8b9239a86874d9)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 backends/rng-egd.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 9e5a536..2962795 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -91,12 +91,14 @@ static int rng_egd_chr_can_read(void *opaque)
 static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 {
 RngEgd *s = RNG_EGD(opaque);
+size_t buf_offset = 0;
 
 while (size  0  s-requests) {
 RngRequest *req = s-requests-data;
 int len = MIN(size, req-size - req-offset);
 
-memcpy(req-data + req-offset, buf, len);
+memcpy(req-data + req-offset, buf + buf_offset, len);
+buf_offset += len;
 req-offset += len;
 size -= len;
 
-- 
1.7.9.5

[Qemu-devel] [PATCH 12/32] audio: honor QEMU_AUDIO_TIMER_PERIOD instead of waking up every nano second

2013-12-04 Thread Michael Roth

From: Hans de Goede hdego...@redhat.com

Now that we no longer have MIN_REARM_TIMER_NS a bug in the audio subsys has
clearly shown it self by trying to make a timer fire every nano second.

Note we have a similar problem in 1.6, 1.5 and older but there
MIN_REARM_TIMER_NS limits the wakeups caused by audio being active to
4000 times / second. This still causes a host cpu load of 50 % for simply
playing audio, where as with this patch git master is at 13%, so we should
backport this to 1.5 and 1.6 too.

Note this will not apply to 1.5 and 1.6 as is.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Hans de Goede hdego...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
(cherry picked from commit b4350deed67b95651896ddb60cf9f765093a4848)

Conflicts:

audio/audio.c

*fixed to reflect 1.6 timer function/clock names

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 audio/audio.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/audio/audio.c b/audio/audio.c
index 02bb886..bcd41a9 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1124,7 +1124,8 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
 if (audio_is_timer_needed ()) {
-qemu_mod_timer (s-ts, qemu_get_clock_ns (vm_clock) + 1);
+qemu_mod_timer (s-ts,
+qemu_get_clock_ns(vm_clock) + conf.period.ticks);
 }
 else {
 qemu_del_timer (s-ts);
-- 
1.7.9.5

[Qemu-devel] [PATCH 10/32] xen_disk: mark ioreq as mapped before unmapping in error case

2013-12-04 Thread Michael Roth

From: Matthew Daley mat...@gmail.com

Commit 4472beae modified the semantics of ioreq_{un,}map so that they are
idempotent if called when they're not needed (ie., twice in a row). However,
it neglected to handle the case where batch mapping is not being used (the
default), and one of the grants fails to map. In this case, ioreq_unmap will
be called to unwind and unmap any mappings already performed, but ioreq_unmap
simply returns due to the aforementioned change (the ioreq has not already
been marked as mapped).

The frontend user can therefore force xen_disk to leak grant mappings, a
per-domain limited resource.

Fix by marking the ioreq as mapped before calling ioreq_unmap in this
situation.

Signed-off-by: Matthew Daley mat...@gmail.com
Signed-off-by: Stefano Stabellini stefano.stabell...@eu.citrix.com
(cherry picked from commit a76f48e53382e6f039db6278443e3ce437653302)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 hw/block/xen_disk.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 727f433..03e30d7 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -405,6 +405,7 @@ static int ioreq_map(struct ioreq *ioreq)
 xen_be_printf(ioreq-blkdev-xendev, 0,
   can't map grant ref %d (%s, %d maps)\n,
   refs[i], strerror(errno), 
ioreq-blkdev-cnt_map);
+ioreq-mapped = 1;
 ioreq_unmap(ioreq);
 return -1;
 }
-- 
1.7.9.5

[Qemu-devel] [PATCH 30/32] qdev-monitor: Unref device when device_add fails

2013-12-04 Thread Michael Roth

From: Stefan Hajnoczi stefa...@redhat.com

qdev_device_add() leaks the created device upon failure.  I suspect this
problem crept in because qdev_free() unparents the device but does not
drop a reference - confusing name.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Stefan Hajnoczi stefa...@redhat.com
Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Andreas Färber afaer...@suse.de
(cherry picked from commit ee6abeb6ec08473713848ce9028110f1684853b7)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 qdev-monitor.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/qdev-monitor.c b/qdev-monitor.c
index bb2e1b6..1b2c606 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -518,6 +518,7 @@ DeviceState *qdev_device_add(QemuOpts *opts)
 }
 if (qemu_opt_foreach(opts, set_property, qdev, 1) != 0) {
 qdev_free(qdev);
+object_unref(OBJECT(qdev));
 return NULL;
 }
 if (qdev-id) {
@@ -531,6 +532,7 @@ DeviceState *qdev_device_add(QemuOpts *opts)
 g_free(name);
 }
 if (qdev_init(qdev)  0) {
+object_unref(OBJECT(qdev));
 qerror_report(QERR_DEVICE_INIT_FAILED, driver);
 return NULL;
 }
-- 
1.7.9.5

[Qemu-devel] [PATCH 31/32] pci: unregister vmstate_pcibus on unplug

2013-12-04 Thread Michael Roth

From: Bandan Das b...@redhat.com

PCIBus registers a vmstate during init. Unregister it upon
removal/unplug.

Signed-off-by: Bandan Das b...@redhat.com
Cc: qemu-sta...@nongnu.org
Reviewed-by: Andreas Färber afaer...@suse.de
Signed-off-by: Michael S. Tsirkin m...@redhat.com
(cherry picked from commit 5c397242d5d53c1adecce31817bb439383cf8228)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 hw/pci/pci.c |8 
 1 file changed, 8 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4c004f5..dc5b788 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -47,6 +47,7 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, 
int indent);
 static char *pcibus_get_dev_path(DeviceState *dev);
 static char *pcibus_get_fw_dev_path(DeviceState *dev);
 static int pcibus_reset(BusState *qbus);
+static void pci_bus_finalize(Object *obj);
 
 static Property pci_props[] = {
 DEFINE_PROP_PCI_DEVFN(addr, PCIDevice, devfn, -1),
@@ -73,6 +74,7 @@ static const TypeInfo pci_bus_info = {
 .name = TYPE_PCI_BUS,
 .parent = TYPE_BUS,
 .instance_size = sizeof(PCIBus),
+.instance_finalize = pci_bus_finalize,
 .class_init = pci_bus_class_init,
 };
 
@@ -375,6 +377,12 @@ int pci_bus_num(PCIBus *s)
 return s-parent_dev-config[PCI_SECONDARY_BUS];
 }
 
+static void pci_bus_finalize(Object *obj)
+{
+PCIBus *bus = PCI_BUS(obj);
+vmstate_unregister(NULL, vmstate_pcibus, bus);
+}
+
 static int get_pci_config_device(QEMUFile *f, void *pv, size_t size)
 {
 PCIDevice *s = container_of(pv, PCIDevice, config);
-- 
1.7.9.5

[Qemu-devel] [PATCH 26/32] vfio-pci: Fix multifunction=on

2013-12-04 Thread Michael Roth

From: Alex Williamson alex.william...@redhat.com

When an assigned device is initialized it copies the device config
space into the emulated config space.  Unfortunately multifunction is
setup prior to the device initfn and gets clobbered.  We need to
restore it just like pci-assign does.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Alex Williamson alex.william...@redhat.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
(cherry picked from commit 8d07d6c46597a885eb38d99cc6fff399ce69cd21)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 hw/misc/vfio.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 017e693..d9e78e1 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -3142,6 +3142,13 @@ static int vfio_initfn(PCIDevice *pdev)
 vdev-emulated_config_bits[PCI_HEADER_TYPE] =
   PCI_HEADER_TYPE_MULTI_FUNCTION;
 
+/* Restore or clear multifunction, this is always controlled by QEMU */
+if (vdev-pdev.cap_present  QEMU_PCI_CAP_MULTIFUNCTION) {
+vdev-pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+} else {
+vdev-pdev.config[PCI_HEADER_TYPE] = ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+}
+
 /*
  * Clear host resource mapping info.  If we choose not to register a
  * BAR, such as might be the case with the option ROM, we can get
-- 
1.7.9.5

[Qemu-devel] [PATCH 29/32] qdev-monitor: Fix crash when device_add is called with abstract driver

2013-12-04 Thread Michael Roth

From: Igor Mammedov imamm...@redhat.com

User is able to crash running QEMU when following monitor
command is called:

 device_add intel-hda-generic

Crash is caused by assertion in object_initialize_with_type()
when type is abstract.

Checking if type is abstract before instance is created in
qdev_device_add() allows to prevent crash on incorrect user input.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Igor Mammedov imamm...@redhat.com
Signed-off-by: Andreas Färber afaer...@suse.de
(cherry picked from commit 2fa4e56d88aa0039062bbc7f9a88e9f90c77ed94)

Conflicts:

qdev-monitor.c

*updated to reflect different 1.6 variable names

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 qdev-monitor.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/qdev-monitor.c b/qdev-monitor.c
index 410cdcb..bb2e1b6 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -472,6 +472,12 @@ DeviceState *qdev_device_add(QemuOpts *opts)
 return NULL;
 }
 
+if (object_class_is_abstract(obj)) {
+qerror_report(QERR_INVALID_PARAMETER_VALUE, driver,
+  non-abstract device type);
+return NULL;
+}
+
 k = DEVICE_CLASS(obj);
 
 /* find bus */
-- 
1.7.9.5

[Qemu-devel] [PATCH 28/32] qom: Fix memory leak in object_property_set_link()

2013-12-04 Thread Michael Roth

From: Vlad Yasevich vyase...@redhat.com

Save the result of the call to object_get_canonical_path()
so we can free it.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Vlad Yasevich vyase...@redhat.com
Reviewed-by: Amos Kong ak...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
Signed-off-by: Andreas Färber afaer...@suse.de
(cherry picked from commit 2d3aa28cc2cf382aa04cd577e0be542175eea9bd)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 qom/object.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/qom/object.c b/qom/object.c
index b2479d1..333f807 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -823,8 +823,9 @@ char *object_property_get_str(Object *obj, const char *name,
 void object_property_set_link(Object *obj, Object *value,
   const char *name, Error **errp)
 {
-object_property_set_str(obj, object_get_canonical_path(value),
-name, errp);
+gchar *path = object_get_canonical_path(value);
+object_property_set_str(obj, path, name, errp);
+g_free(path);
 }
 
 Object *object_property_get_link(Object *obj, const char *name,
-- 
1.7.9.5

Re: [Qemu-devel] [RFC PATCH v1 0/5] Add error_abort and associated cleanups

2013-12-04 Thread Eric Blake

On 12/04/2013 02:11 AM, Markus Armbruster wrote:

 No objection to asserting that the caller passed an error object when
 the error object is the only way to signal failure.  You can't force
 your callers to check for failure, but the assertion could help prevent
 accidental misuse.
 
 Assertions fire at run-time, though.

Unfortunately true.

 
 Asserting argument not null first thing in the function should enable
 a sufficiently smart whole-program static checker to flag null
 arguments.

Coverity is such a checker; I think clang can as well.

 
 But having such a static check right at compile-time would be much
 better.  Could attribute nonnull do it?  If yes, do we still need the
 assertion?

gcc's implementation of attribute nonnull is complete trash.  And the
gcc developers know it.  The attribute is still useful for Coverity, but
at least in libvirt, we have taken to using the attribute ONLY when
compiling under a static checker and omitting it under gcc because gcc's
implementation of the attribute is so horribly botched.

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17308

So even with attribute nonnull, you still need the assertion.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH for-1.7] seccomp: setting -sandbox on by default

2013-12-04 Thread Corey Bryant




On 12/04/2013 08:21 AM, Eduardo Otubo wrote:



On 12/04/2013 07:39 AM, Stefan Hajnoczi wrote:

On Fri, Nov 22, 2013 at 11:00:24AM -0500, Paul Moore wrote:

Developers will only be happy with seccomp if it's easy and
rewarding to
support/debug.


Agreed.

As a developer, how do you feel about the audit/syslog based approach I
mentioned earlier?


I used the commands you posted (I think that's what you mean).  They
produce useful output.

The problem is that without an error message on stderr or from the
shell, no one will think QEMU process dead and hung == check seccomp
immediately.  It's frustrating to deal with a silent failure.


The process dies with a SIGKILL, and sig handling in Qemu is hard to
implement due to dozen of external linked libraries that has their own
signal masks and conflicts with seccomp. I've already tried this
approach in the past (you can find in the list by searching for debug mode)


And just to be clear, the signal handling approach was only for debug 
purposes.


There are basically three ways to fail a syscall with seccomp:

SECCOMP_RET_KILL - kernel kills the task immediately without executing 
syscall

SECCOMP_RET_TRAP - kernel sends SIGSYS to the task without executing syscall
SECCOMP_RET_ERRNO - kernel returns an errno to the task wtihout 
executing syscall


You could issue a better error messages if you used TRAP or ERRNO, but 
giving control back to QEMU after (presumably) arbitrary code is being 
executed sort of defeats the purpose.


--
Regards,
Corey Bryant



The optimal goal here is to use virt-test and audit log to eliminate
these sorts of things.

[Qemu-devel] [PATCH 03/32] char: remove watch callback on chardev detach from frontend

2013-12-04 Thread Michael Roth

From: Amit Shah amit.s...@redhat.com

If a frontend device releases the chardev (via unplug), the chr handlers
are set to NULL via qdev's exit callbacks invoking
qemu_chr_add_handlers().  If the chardev had a pending operation, a
callback will be invoked, which will try to access data in the
just-released frontend, causing a segfault.

Ensure the callbacks are disabled when frontends release chardevs.

This was seen when a virtio-serial port was unplugged when heavy
guest-host IO was in progress (causing a callback to be registered).
In the window in which the throttling was active, unplugging ports
caused a qemu segfault.

https://bugzilla.redhat.com/show_bug.cgi?id=985205

CC: qemu-sta...@nongnu.org
Reported-by: Sibiao Luo s...@redhat.com
Reviewed-by: Gerd Hoffmann kra...@redhat.com
Signed-off-by: Amit Shah amit.s...@redhat.com
(cherry picked from commit 386a5a1e0057e220f79c48fe3689e3dfb17f1b09)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 qemu-char.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/qemu-char.c b/qemu-char.c
index fa00517..fc1c23d 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -193,6 +193,8 @@ void qemu_chr_fe_printf(CharDriverState *s, const char 
*fmt, ...)
 va_end(ap);
 }
 
+static void remove_fd_in_watch(CharDriverState *chr);
+
 void qemu_chr_add_handlers(CharDriverState *s,
IOCanReadHandler *fd_can_read,
IOReadHandler *fd_read,
@@ -203,6 +205,7 @@ void qemu_chr_add_handlers(CharDriverState *s,
 
 if (!opaque  !fd_can_read  !fd_read  !fd_event) {
 fe_open = 0;
+remove_fd_in_watch(s);
 } else {
 fe_open = 1;
 }
-- 
1.7.9.5

Re: [Qemu-devel] [PATCH V7 0/6] export internal snapshot by qemu-nbd

2013-12-04 Thread Stefan Hajnoczi

On Wed, Dec 04, 2013 at 05:10:53PM +0800, Wenchao Xia wrote:
 This series allow user to read internal snapshot's contents without qemu-img
 convert.
 
 V2:
   Address Stefan's comments:
   02: add 'fall through' comments in the case statement.
   03: add doc about the difference of internal snapshot and backing chain
 snapshot, which is used in previous '--snapshot' parameter.
   Other:
   01,04: rebased on upstream with conflict resolved.
 
 v3:
   Address Paolo's comments:
   02: add parameter -l snapshot_id_or_name, rename options
 snapshot-load to load-snapshot, use QemuOpts.
   03: rename snapshot-load to load-snapshot.
   04: related change to test both -l and -L case.
   05-07: add similar parameter for qemu-img convert.
   Other:
   01: foldered original snapshot logic into function
 bdrv_snapshot_load_tmp_by_id_or_name(), since multiple
 caller present in this version. Refined error message from
 , reason: %s to : %s.
   02: Refined error message from , reason: %s to : %s.
   03: Rename PARAM to SNAPSHOT_PARAM.
 
 v4:
   Address Eric's comments:
   01: typo fix.
   02: squashed patch. Use only one option -l and distiguish
 the cases by the starting string.
   03: remove 'eval', remove '_supported_os Linux', remove the
 comments that have typo.
   04: squashed patch. Add only one option -l and distiguish
 the cases by the starting string.
   05: remove indentation.
   Address Paolo's comments:
   02: Use only one option -l and distiguish the cases by
 the starting string.
   04: Use only one option -l and distiguish the cases by
 the starting string, mark old -s as deprecated, added doc
 for them.
 
 v5:
   Address Jeff's comments:
   Quote image name in test cases.
 
 v6:
   Address Kevin's comments:
   1: typo fix, remove device and snapshot info in error message.
   2: use strstart().
   3: use _require_command(), limit proto to file, since when proto=nbd
 it can't work. also changed _require_command() to tip better.
   4: use strstart().
   6: new patch, since I found the doc missing in debugging.
 
 v7:
   Address Stefan's comments:
   3: use unix socket and wait for 30s, to make the case reliable, folder
   4: add missing doc change.
   Other:
   3,5: add label method 1/method 2 in echo message, to tip better.
 
 Wenchao Xia (6):
   1 snapshot: distinguish id and name in load_tmp
   2 qemu-nbd: support internal snapshot export
   3 qemu-iotests: add 058 internal snapshot export with qemu-nbd case
   4 qemu-img: add -l for snapshot in convert
   5 qemu-iotests: add test for snapshot in qemu-img convert
   6 qemu-nbd: add doc for option -f
 
  block/qcow2-snapshot.c   |   10 +++-
  block/qcow2.h|5 +-
  block/snapshot.c |   77 ++-
  include/block/block_int.h|4 +-
  include/block/snapshot.h |   15 -
  qemu-img-cmds.hx |4 +-
  qemu-img.c   |   44 +++--
  qemu-img.texi|   12 +++-
  qemu-nbd.c   |   47 ++-
  qemu-nbd.texi|   10 +++-
  tests/qemu-iotests/058   |  138 
 ++
  tests/qemu-iotests/058.out   |   44 +
  tests/qemu-iotests/check |1 +
  tests/qemu-iotests/common.rc |3 +-
  tests/qemu-iotests/group |1 +
  15 files changed, 390 insertions(+), 25 deletions(-)
  create mode 100755 tests/qemu-iotests/058
  create mode 100644 tests/qemu-iotests/058.out
 
 

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan

[Qemu-devel] [PATCH 13/32] monitor: eliminate monitor_event_state_lock

2013-12-04 Thread Michael Roth

From: Paolo Bonzini pbonz...@redhat.com

This lock does not protect anything that the BQL does not already
protect.  Furthermore, with -nodefaults and no monitor, the mutex
is not initialized but monitor_protocol_event_queue is called
anyway, which causes a crash under mingw (and only works by luck.
under Linux or other POSIX OSes).

Reported-by: Orx Goshen orx.gos...@intel.com
Cc: Daniel Berrange berra...@redhat.com
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Luiz Capitulino lcapitul...@redhat.com
(cherry picked from commit c20b7fa4b2fedd979bcb0cc974bb5d08a10e3448)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 monitor.c |6 --
 1 file changed, 6 deletions(-)

diff --git a/monitor.c b/monitor.c
index 5dc0aa9..99bfcd9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -508,7 +508,6 @@ static const char *monitor_event_names[] = {
 QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
 
 MonitorEventState monitor_event_state[QEVENT_MAX];
-QemuMutex monitor_event_state_lock;
 
 /*
  * Emits the event to every monitor instance
@@ -540,7 +539,6 @@ monitor_protocol_event_queue(MonitorEvent event,
 int64_t now = qemu_get_clock_ns(rt_clock);
 assert(event  QEVENT_MAX);
 
-qemu_mutex_lock(monitor_event_state_lock);
 evstate = (monitor_event_state[event]);
 trace_monitor_protocol_event_queue(event,
data,
@@ -573,7 +571,6 @@ monitor_protocol_event_queue(MonitorEvent event,
 evstate-last = now;
 }
 }
-qemu_mutex_unlock(monitor_event_state_lock);
 }
 
 
@@ -586,7 +583,6 @@ static void monitor_protocol_event_handler(void *opaque)
 MonitorEventState *evstate = opaque;
 int64_t now = qemu_get_clock_ns(rt_clock);
 
-qemu_mutex_lock(monitor_event_state_lock);
 
 trace_monitor_protocol_event_handler(evstate-event,
  evstate-data,
@@ -598,7 +594,6 @@ static void monitor_protocol_event_handler(void *opaque)
 evstate-data = NULL;
 }
 evstate-last = now;
-qemu_mutex_unlock(monitor_event_state_lock);
 }
 
 
@@ -635,7 +630,6 @@ monitor_protocol_event_throttle(MonitorEvent event,
  * and initialize state */
 static void monitor_protocol_event_init(void)
 {
-qemu_mutex_init(monitor_event_state_lock);
 /* Limit RTC  BALLOON events to 1 per second */
 monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
 monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
-- 
1.7.9.5

[Qemu-devel] [PATCH 27/32] virtio-net: fix the memory leak in rxfilter_notify()

2013-12-04 Thread Michael Roth

From: Amos Kong ak...@redhat.com

object_get_canonical_path() returns a gchar*, it should be freed by the
caller.

Signed-off-by: Amos Kong ak...@redhat.com
Reviewed-by: Michael S. Tsirkin m...@redhat.com
Reviewed-by: Vlad Yasevich vyase...@redhat.com
Reviewed-by: Andreas Färber afaer...@suse.de
Signed-off-by: Stefan Hajnoczi stefa...@redhat.com
(cherry picked from commit 96e35046e4a97df5b4e1e24e217eb1e1701c7c71)

Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
---
 hw/net/virtio-net.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index bb757b3..5320aab 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -200,16 +200,16 @@ static void rxfilter_notify(NetClientState *nc)
 VirtIONet *n = qemu_get_nic_opaque(nc);
 
 if (nc-rxfilter_notify_enabled) {
+gchar *path = object_get_canonical_path(OBJECT(n-qdev));
 if (n-netclient_name) {
 event_data = qobject_from_jsonf({ 'name': %s, 'path': %s },
-n-netclient_name,
-
object_get_canonical_path(OBJECT(n-qdev)));
+n-netclient_name, path);
 } else {
-event_data = qobject_from_jsonf({ 'path': %s },
-
object_get_canonical_path(OBJECT(n-qdev)));
+event_data = qobject_from_jsonf({ 'path': %s }, path);
 }
 monitor_protocol_event(QEVENT_NIC_RX_FILTER_CHANGED, event_data);
 qobject_decref(event_data);
+g_free(path);
 
 /* disable event notification to avoid events flooding */
 nc-rxfilter_notify_enabled = 0;
-- 
1.7.9.5

1 2 3 >

1 - 100 of 228 matches

Mail list logo