[Qemu-devel] [PATCH V6 5/6] blkdebug: add debug events for snapshot
Some code in qcow2-snapshot.c directly accesses bs-file, so in those places errors can't be injected by other events. Since the code in qcow2-snapshot.c is similar to the other qcow2 internal code (in regards to e.g. the L1 table), add some debug events. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com Reviewed-by: Max Reitz mre...@redhat.com --- block/blkdebug.c |4 block/qcow2-snapshot.c |3 +++ include/block/block.h |4 3 files changed, 11 insertions(+), 0 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 16d2b91..3d5f7cf 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -186,6 +186,10 @@ static const char *event_names[BLKDBG_EVENT_MAX] = { [BLKDBG_FLUSH_TO_OS]= flush_to_os, [BLKDBG_FLUSH_TO_DISK] = flush_to_disk, + +[BLKDBG_SNAPSHOT_L1_UPDATE] = snapshot_l1_update, +[BLKDBG_SNAPSHOT_LIST_UPDATE] = snapshot_list_update, +[BLKDBG_SNAPSHOT_HEADER_UPDATE] = snapshot_header_update, }; static int get_event_by_name(const char *name, BlkDebugEvent *event) diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 685ef8b..d2e5275 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -207,6 +207,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs, Error **errp) } +BLKDBG_EVENT(bs-file, BLKDBG_SNAPSHOT_LIST_UPDATE); /* Write all snapshots to the new list */ for(i = 0; i s-nb_snapshots; i++) { sn = s-snapshots + i; @@ -292,6 +293,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs, Error **errp) header_data.nb_snapshots= cpu_to_be32(s-nb_snapshots); header_data.snapshots_offset= cpu_to_be64(snapshots_offset); +BLKDBG_EVENT(bs-file, BLKDBG_SNAPSHOT_HEADER_UPDATE); ret = bdrv_pwrite_sync(bs-file, offsetof(QCowHeader, nb_snapshots), header_data, sizeof(header_data)); if (ret 0) { @@ -452,6 +454,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, goto dealloc_cluster; } +BLKDBG_EVENT(bs-file, BLKDBG_SNAPSHOT_L1_UPDATE); ret = bdrv_pwrite(bs-file, sn-l1_table_offset, l1_table, s-l1_size * sizeof(uint64_t)); if (ret 0) { diff --git a/include/block/block.h b/include/block/block.h index 3560deb..cbccc3d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -476,6 +476,10 @@ typedef enum { BLKDBG_FLUSH_TO_OS, BLKDBG_FLUSH_TO_DISK, +BLKDBG_SNAPSHOT_L1_UPDATE, +BLKDBG_SNAPSHOT_LIST_UPDATE, +BLKDBG_SNAPSHOT_HEADER_UPDATE, + BLKDBG_EVENT_MAX, } BlkDebugEvent; -- 1.7.1
[Qemu-devel] [PATCH V6 4/6] qcow2: cancel the modification on fail in qcow2_snapshot_create()
Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com Reviewed-by: Max Reitz mre...@redhat.com --- block/qcow2-snapshot.c | 25 + 1 files changed, 21 insertions(+), 4 deletions(-) diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 70e329e..685ef8b 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -400,6 +400,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, int i, ret; uint64_t *l1_table = NULL; int64_t l1_table_offset; +Error *err = NULL; memset(sn, 0, sizeof(*sn)); @@ -448,7 +449,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, PRIu64 with size % PRIu64, sn-l1_table_offset, (uint64_t)(s-l1_size * sizeof(uint64_t))); -goto fail; +goto dealloc_cluster; } ret = bdrv_pwrite(bs-file, sn-l1_table_offset, l1_table, @@ -459,7 +460,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, PRIu64 with size % PRIu64, sn-l1_table_offset, (uint64_t)(s-l1_size * sizeof(uint64_t))); -goto fail; +goto dealloc_cluster; } g_free(l1_table); @@ -476,7 +477,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, Failed in update of refcount for snapshot at % PRIu64 with size %d, s-l1_table_offset, s-l1_size); -goto fail; +goto dealloc_cluster; } /* Append the new snapshot to the snapshot list */ @@ -494,7 +495,7 @@ void qcow2_snapshot_create(BlockDriverState *bs, g_free(s-snapshots); s-snapshots = old_snapshot_list; s-nb_snapshots--; -goto fail; +goto restore_refcount; } g_free(old_snapshot_list); @@ -514,6 +515,22 @@ void qcow2_snapshot_create(BlockDriverState *bs, #endif return; +restore_refcount: +if (qcow2_update_snapshot_refcount(bs, s-l1_table_offset, s-l1_size, -1) + 0 errp) { +/* Nothing can be done now, need image check later */ +error_setg(err, %s\nqcow2: Error in restoring refcount in snapshot, + error_get_pretty(*errp)); +error_free(*errp); +*errp = NULL; +error_propagate(errp, err); +} + +dealloc_cluster: +qcow2_free_clusters(bs, sn-l1_table_offset, +sn-l1_size * sizeof(uint64_t), +QCOW2_DISCARD_ALWAYS); + fail: g_free(sn-id_str); g_free(sn-name); -- 1.7.1
[Qemu-devel] [PATCH V6 0/6] qcow2: rollback the modification on fail in snapshot creation
V2: 1: all fail case will goto fail section. 2: add the goto code. v3: Address Stefan's comments: 2: don't goto fail after allocation failure. 3: use sn-l1size correctly in qcow2_free_cluster(). 4-7: add test case to verify the error paths. Other: 1: new patch fix a existing bug, which will be exposed in error path test. v4: General change: rebased on upstream since error path for qcow2_write_snapshots() already exist in upstream. removed old patch 1 since it is fixed by Max in upstream. 5: moved the snapshot_l1_update event just before write operation, instead of before overlap check, since it is more straight. 6: remove a duplicated error path test about flush after snapshot list update, add a filter which replace number to X, since now in error in report detailed message including error cluster number. Address Stefan's comments: 1, 2, 4: add *errp to store detailed error message, instead of error_report() and compile time determined debug printf message. 3: do not free cluster when fail in header update for safety reason. Address Eric's comments: 1, 2, 4: add *errp to store detailed error message, instead of error_report() and compile time determined debug printf message. 5: squashed patches that add and use debug events. 6: added comments about test only on Linux. v5: General change: 6: rebased on upstream, use case number 070, adjust 070.out due to error message change in this version. Address Max's comments: 1 use error_setg_errno() when possible, remove ret = in functions when possible since the function does not need to return int value, fix 32bit/ 64bit issue in printf for sizeof and offse, typo fix. 2 use error_setg_errno() when possible, fix 32bit/64bit issue in printf for sizeof and offse, typo fix. 3 typo fix in comments. 5 typo fix in commit message. Address Eric's comments: 2 fix 32bit/64bit issue in printf for sizeof and offse. v6: Address Jeff's comments: 6: add quote for image name in test case. Wenchao Xia (6): 1 snapshot: add parameter *errp in snapshot create 2 qcow2: add error message in qcow2_write_snapshots() 3 qcow2: do not free clusters when fail in header update in qcow2_write_snapshots 4 qcow2: cancel the modification on fail in qcow2_snapshot_create() 5 blkdebug: add debug events for snapshot 6 qemu-iotests: add test for qcow2 snapshot block/blkdebug.c |4 + block/qcow2-snapshot.c | 105 --- block/qcow2.h|4 +- block/rbd.c | 19 ++-- block/sheepdog.c | 28 +++-- block/snapshot.c | 19 +++- blockdev.c | 10 +- include/block/block.h|4 + include/block/block_int.h|5 +- include/block/snapshot.h |5 +- qemu-img.c | 10 +- savevm.c | 12 ++- tests/qemu-iotests/070 | 216 ++ tests/qemu-iotests/070.out | 35 ++ tests/qemu-iotests/common.filter |7 ++ tests/qemu-iotests/group |1 + 16 files changed, 427 insertions(+), 57 deletions(-) create mode 100755 tests/qemu-iotests/070 create mode 100644 tests/qemu-iotests/070.out
[Qemu-devel] [PATCH v2 0/3] Device tree cleanups
From: Peter Crosthwaite peter.crosthwa...@xilinx.com Fix the name stem of the devicetree API (P1 - s/qemu_devtree/qemu_fdt) and cleanup error report (P3). Trivial patch P2 fixing an arugment name along the way. Tested using: 1: Alex's e500 test vector. 2: Xilinx Zynq (tests arm/boot.c). I have testing using Zynq with Mem 4gb and a bogus dts (size cells = 1) to give that particular error path some exercise. To give some exercise to the error paths, I hacked up my libfdt to throw errors randomly: --- a/libfdt/fdt_rw.c +++ b/libfdt/fdt_rw.c @@ -48,6 +48,8 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#include stdlib.h #include libfdt_env.h #include fdt.h @@ -279,6 +281,15 @@ int fdt_setprop(void *fdt, int nodeoffset, const char *name, FDT_RW_CHECK_HEADER(fdt); +static int seeded = 0; +if (!seeded) { +srand(time(NULL)); +seeded = 1; +} +if (!(rand() 0x7)) { +return -((rand() 0x3) + 1); +} + Some sample outputs from e500 boot (Using the above tainted libfdt): - qemu-system-ppc: qemu_fdt_setprop: Couldn't set /memory/reg: FDT_ERR_BADOFFSET Aborted - qemu-system-ppc: qemu_fdt_setprop_string: Couldn't set /soc@e000/device_type = soc: FDT_ERR_NOSPACE Aborted - qemu-system-ppc: qemu_fdt_setprop_cell: Couldn't set /soc@e000/#address-cells = 0x01: FDT_ERR_NOSPACE Aborted - Peter Crosthwaite (3): device_tree: s/qemu_devtree/qemu_fdt globally device_tree: qemu_fdt_setprop: Rename val_array arg device_tree: qemu_fdt_setprop: Fixup error reporting device_tree.c| 230 +++-- hw/arm/boot.c| 51 - hw/arm/vexpress.c| 23 +++-- hw/microblaze/boot.c | 17 ++- hw/ppc/e500.c| 241 ++- hw/ppc/e500.h| 3 +- hw/ppc/e500plat.c| 9 +- hw/ppc/mpc8544ds.c | 9 +- hw/ppc/ppc440_bamboo.c | 34 ++ hw/ppc/spapr_rtas.c | 40 ++- hw/ppc/virtex_ml507.c| 5 +- include/sysemu/device_tree.h | 219 ++- 12 files changed, 519 insertions(+), 362 deletions(-) -- 1.8.3.rc1.44.gb387c77.dirty
[Qemu-devel] [PATCH v2 1/3] device_tree: s/qemu_devtree/qemu_fdt globally
From: Peter Crosthwaite peter.crosthwa...@xilinx.com The qemu_devtree API is a wrapper around the fdt_ set of APIs. Rename accordingly. Signed-off-by: Peter Crosthwaite peter.crosthwa...@xilinx.com --- device_tree.c| 62 ++--- hw/arm/boot.c| 24 ++--- hw/arm/vexpress.c| 18 ++-- hw/microblaze/boot.c | 12 +-- hw/ppc/e500.c| 213 ++- hw/ppc/e500plat.c| 6 +- hw/ppc/mpc8544ds.c | 6 +- hw/ppc/ppc440_bamboo.c | 24 ++--- hw/ppc/spapr_rtas.c | 16 ++-- hw/ppc/virtex_ml507.c| 2 +- include/sysemu/device_tree.h | 80 11 files changed, 232 insertions(+), 231 deletions(-) diff --git a/device_tree.c b/device_tree.c index ffec99a..5a13d41 100644 --- a/device_tree.c +++ b/device_tree.c @@ -127,8 +127,8 @@ static int findnode_nofail(void *fdt, const char *node_path) return offset; } -int qemu_devtree_setprop(void *fdt, const char *node_path, - const char *property, const void *val_array, int size) +int qemu_fdt_setprop(void *fdt, const char *node_path, + const char *property, const void *val_array, int size) { int r; @@ -142,8 +142,8 @@ int qemu_devtree_setprop(void *fdt, const char *node_path, return r; } -int qemu_devtree_setprop_cell(void *fdt, const char *node_path, - const char *property, uint32_t val) +int qemu_fdt_setprop_cell(void *fdt, const char *node_path, + const char *property, uint32_t val) { int r; @@ -157,15 +157,15 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path, return r; } -int qemu_devtree_setprop_u64(void *fdt, const char *node_path, - const char *property, uint64_t val) +int qemu_fdt_setprop_u64(void *fdt, const char *node_path, + const char *property, uint64_t val) { val = cpu_to_be64(val); -return qemu_devtree_setprop(fdt, node_path, property, val, sizeof(val)); +return qemu_fdt_setprop(fdt, node_path, property, val, sizeof(val)); } -int qemu_devtree_setprop_string(void *fdt, const char *node_path, -const char *property, const char *string) +int qemu_fdt_setprop_string(void *fdt, const char *node_path, +const char *property, const char *string) { int r; @@ -179,8 +179,8 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path, return r; } -const void *qemu_devtree_getprop(void *fdt, const char *node_path, - const char *property, int *lenp) +const void *qemu_fdt_getprop(void *fdt, const char *node_path, + const char *property, int *lenp) { int len; const void *r; @@ -196,11 +196,11 @@ const void *qemu_devtree_getprop(void *fdt, const char *node_path, return r; } -uint32_t qemu_devtree_getprop_cell(void *fdt, const char *node_path, - const char *property) +uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path, + const char *property) { int len; -const uint32_t *p = qemu_devtree_getprop(fdt, node_path, property, len); +const uint32_t *p = qemu_fdt_getprop(fdt, node_path, property, len); if (len != 4) { fprintf(stderr, %s: %s/%s not 4 bytes long (not a cell?)\n, __func__, node_path, property); @@ -209,7 +209,7 @@ uint32_t qemu_devtree_getprop_cell(void *fdt, const char *node_path, return be32_to_cpu(*p); } -uint32_t qemu_devtree_get_phandle(void *fdt, const char *path) +uint32_t qemu_fdt_get_phandle(void *fdt, const char *path) { uint32_t r; @@ -223,15 +223,15 @@ uint32_t qemu_devtree_get_phandle(void *fdt, const char *path) return r; } -int qemu_devtree_setprop_phandle(void *fdt, const char *node_path, - const char *property, - const char *target_node_path) +int qemu_fdt_setprop_phandle(void *fdt, const char *node_path, + const char *property, + const char *target_node_path) { -uint32_t phandle = qemu_devtree_get_phandle(fdt, target_node_path); -return qemu_devtree_setprop_cell(fdt, node_path, property, phandle); +uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path); +return qemu_fdt_setprop_cell(fdt, node_path, property, phandle); } -uint32_t qemu_devtree_alloc_phandle(void *fdt) +uint32_t qemu_fdt_alloc_phandle(void *fdt) { static int phandle = 0x0; @@ -255,7 +255,7 @@ uint32_t qemu_devtree_alloc_phandle(void *fdt) return phandle++; } -int qemu_devtree_nop_node(void *fdt, const char *node_path) +int qemu_fdt_nop_node(void *fdt, const char *node_path) { int r; @@ -269,7 +269,7 @@ int
[Qemu-devel] [PATCH v2 2/3] device_tree: qemu_fdt_setprop: Rename val_array arg
From: Peter Crosthwaite peter.crosthwa...@xilinx.com Looking at the implementation, this doesn't really have a lot to do with arrays. Its just a pointer to a buffer and is passed through to the wrapped fn (qemu_fdt_setprop) unchanged. So rename to make it consistent with libfdt, which in the wrapped function just calls it val. Signed-off-by: Peter Crosthwaite peter.crosthwa...@xilinx.com --- device_tree.c| 4 ++-- include/sysemu/device_tree.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/device_tree.c b/device_tree.c index 5a13d41..1e6bbad 100644 --- a/device_tree.c +++ b/device_tree.c @@ -128,11 +128,11 @@ static int findnode_nofail(void *fdt, const char *node_path) } int qemu_fdt_setprop(void *fdt, const char *node_path, - const char *property, const void *val_array, int size) + const char *property, const void *val, int size) { int r; -r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val_array, size); +r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size); if (r 0) { fprintf(stderr, %s: Couldn't set %s/%s: %s\n, __func__, node_path, property, fdt_strerror(r)); diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h index 68da97a..899f05c 100644 --- a/include/sysemu/device_tree.h +++ b/include/sysemu/device_tree.h @@ -18,7 +18,7 @@ void *create_device_tree(int *sizep); void *load_device_tree(const char *filename_path, int *sizep); int qemu_fdt_setprop(void *fdt, const char *node_path, - const char *property, const void *val_array, int size); + const char *property, const void *val, int size); int qemu_fdt_setprop_cell(void *fdt, const char *node_path, const char *property, uint32_t val); int qemu_fdt_setprop_u64(void *fdt, const char *node_path, -- 1.8.3.rc1.44.gb387c77.dirty
[Qemu-devel] [PATCH v2 3/3] device_tree: qemu_fdt_setprop: Fixup error reporting
From: Peter Crosthwaite peter.crosthwa...@xilinx.com There are a mix of usages of the qemu_fdt_* API calls, some which wish to assert and abort QEMU on failure and some of which wish to do their own error handling. The latter in more correct, but the former is in the majority and more pragmatic. However the asserting clients are usually doing large numbers fdt ops and only want to assert if any one of them breaks. So the cleanest compromising solution is: 1. Require an Error ** to be passes to all fdt ops. 2. And perform no action if its already non-null (error condition). 3. If no Error ** is given report errors to stderr Step one allows clients to do their own error handling if they wish too, which is the most flexible: Error *err = NULL; fdt_foo( ... , err); if (err) { /* handle error my special way */ } Step two allows you to do a large number of fdt ops then check them all only the once at the end: Error *err = NULL; fdt_foo( ... , err); fdt_bar( ... , err); fdt_baz( ... , err); if (err) { /* First encountered error will be in err */ } Step 3, handles the common case where the error is not an issue, but just needs to make noise at the user (via stderr). This seems common for bootloader code that sets chosen and initrd props etc (rather than machine description). All error reporting is stylistically udpated to use Error ** instead of integer return codes and no more exit(1) on failures. Signed-off-by: Peter Crosthwaite peter.crosthwa...@xilinx.com --- device_tree.c| 222 --- hw/arm/boot.c| 49 -- hw/arm/vexpress.c| 23 ++--- hw/microblaze/boot.c | 13 +-- hw/ppc/e500.c| 214 - hw/ppc/e500.h| 3 +- hw/ppc/e500plat.c| 9 +- hw/ppc/mpc8544ds.c | 9 +- hw/ppc/ppc440_bamboo.c | 30 ++ hw/ppc/spapr_rtas.c | 40 ++-- hw/ppc/virtex_ml507.c| 5 +- include/sysemu/device_tree.h | 193 ++--- 12 files changed, 483 insertions(+), 327 deletions(-) diff --git a/device_tree.c b/device_tree.c index 1e6bbad..525cdaa 100644 --- a/device_tree.c +++ b/device_tree.c @@ -4,8 +4,10 @@ * interface. * * Copyright 2008 IBM Corporation. + * Copyright 2013 Xilinx Inc. * Authors: Jerone Young jyou...@us.ibm.com * Hollis Blanchard holl...@us.ibm.com + * Peter Crosthwaite peter.crosthwa...@xilinx.com * * This work is licensed under the GNU GPL license version 2 or later. * @@ -113,122 +115,176 @@ fail: return NULL; } -static int findnode_nofail(void *fdt, const char *node_path) +#define error_set_or_print(errp, ...) \ +do {\ +if (errp) { \ +error_setg(errp, __VA_ARGS__); \ +} else {\ +fprintf(stderr, __VA_ARGS__); \ +} \ +} while (0) + +static int findnode(void *fdt, const char *node_path, Error **errp) { int offset; offset = fdt_path_offset(fdt, node_path); if (offset 0) { -fprintf(stderr, %s Couldn't find node %s: %s\n, __func__, node_path, -fdt_strerror(offset)); -exit(1); +error_set_or_print(errp, %s Couldn't find node %s: %s\n, __func__, + node_path, fdt_strerror(offset)); } return offset; } -int qemu_fdt_setprop(void *fdt, const char *node_path, - const char *property, const void *val, int size) +void qemu_fdt_setprop(void *fdt, const char *node_path, const char *property, + const void *val, int size, Error **errp) { int r; -r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size); -if (r 0) { -fprintf(stderr, %s: Couldn't set %s/%s: %s\n, __func__, node_path, -property, fdt_strerror(r)); -exit(1); +if (errp *errp) { +return; } -return r; +r = findnode(fdt, node_path, errp); +if (r 0) { +return; +} +r = fdt_setprop(fdt, r, property, val, size); +if (r 0) { +error_set_or_print(errp, %s: Couldn't set %s/%s: %s\n, __func__, + node_path, property, fdt_strerror(r)); +} } -int qemu_fdt_setprop_cell(void *fdt, const char *node_path, - const char *property, uint32_t val) +void qemu_fdt_setprop_cell(void *fdt, const char *node_path, + const char *property, uint32_t val, Error **errp) { int r; -r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val); -if (r 0) { -
Re: [Qemu-devel] [PATCH 1/7] usb: remove old usb-host code
On Fr, 2013-11-08 at 17:51 +0100, Jan Kiszka wrote: On 2013-11-08 16:39, Gerd Hoffmann wrote: Hi, OK, then here is the first issue I ran into while trying libusbx (git head, i.e. 1.0.17+: The new stack causes significant latency issues that makes it almost unusable for pass-through of USB audio devices (some headset in my case). Reverting usb-linux and disabling libusb over QEMU git head makes things work again. I'll have to stick with this for now as it is affecting my work environment. Any spontaneous ideas how to analyse or even resolve this? Try setting isobsize property to something smaller than 32 (which is the default). OK, isobsize=2 and isobufs=32 helped, possibly other combinations as well - but not just reducing isobsize or increasing isobufs. Any theory about this? How can we find better defaults? isobsize is the size of a single buffer (in MaxPacketSize units). isobufs is the number of buffers in the ring. So the total ring buffer size is MaxPacketSize * isobsize * isobufs. isobsize basically trades overhead for latency. Larger numbers reduce the overhead, smaller numbers reduce latency. isobufs should be as small as possible. Start with 4 (default). If you get overruns/underruns increase. We should probably look at the endpoint interval, then calculate how many packets we should expect within a certain time range and use that as additional factor for the buffer size. That should get the defaults closer to the actual needs of the device. cheers, Gerd
Re: [Qemu-devel] Buildbot failures (MinGW)
On So, 2013-11-10 at 19:25 +0100, Stefan Weil wrote: Hello Gerd, could you please install bison and flex on the buildbot machine for default_mingw? Done. cheers, Gerd
Re: [Qemu-devel] [RFC PATCH] i386: Add _PXM method to ACPI CPU objects
On Sun, 10 Nov 2013 12:36:29 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Fri, Nov 08, 2013 at 06:33:12PM +0100, Igor Mammedov wrote: On Fri, 8 Nov 2013 12:22:12 +0200 Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com wrote: Hi, On Thu, Nov 07, 2013 at 03:03:42PM +0200, Michael S. Tsirkin wrote: On Thu, Nov 07, 2013 at 01:41:59PM +0100, Vasilis Liaskovitis wrote: This patch adds a _PXM method to ACPI CPU objects for the pc machine. The _PXM value is derived from the passed in guest info, same way as CPU SRAT entries. The motivation for this patch is a CPU hot-unplug/hot-plug bug observed when using a 3.11 linux guest kernel on a multi-NUMA node qemu/kvm VM. The linux guest kernel parses the SRAT CPU entries at boot time and stores them in the array __apicid_to_node. When a CPU is hot-removed, the linux guest kernel resets the removed CPU's __apicid_to_node entry to NO_NUMA_NODE (kernel commit c4c60524). When the removed cpu is hot-added again, the linux kernel looks up the hot-added cpu object's _PXM method instead of somehow re-discovering the SRAT entry info. With current qemu/seabios, the _PXM method is not found, and the CPU is thus hot-plugged in the default NUMA node 0. (The problem does not show up on initial hotplug of a cpu; the PXM method is still not found in this case, but the kernel still has the correct proximity value from the CPU's SRAT entry stored in __apicid_to_node) ACPI spec mentions that the _PXM method is the correct way to determine proximity information at hot-add time. Where does it say this? I found this: If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a dynamically added processor is not present in the System Resource Affinity Table (SRAT), a _PXM object must exist for the processor’s device or one of its ancestors in the ACPI Namespace. Does this mean that linux is buggy, and should be fixed up to look up the apic ID in SRAT? The quote above suggests that if SRAT is absent, _PXM should be present. Seabios/qemu provide SRAT entries, and no _PXM. The fact that the kernel resets the parse SRAT info on hot-remove time looks like a kernel problem. But As Toshi Kani mentioned in the original thread, here is a quote from ACPI 5.0, stating _PXM and only _PXM should be used at hot-plug time: === 17.2.1 System Resource Affinity Table Definition This optional System Resource Affinity Table (SRAT) provides the boot time description of the processor and memory ranges belonging to a system locality. OSPM will consume the SRAT only at boot time. OSPM should use _PXM for any devices that are hot-added into the system after boot up. So in this sense, the kernel is correct (kernel only uses _PXM at hot-plug time) , and qemu/Seabios should have _PXM methods for hot operations. in terms of RFC SHOULD doesn't mean MUST, and in my interpretation of above is that SRAT parsed once but it doesn't mean that OS should forget data from it. Well it says OSPM will consume the SRAT only at boot time. How do you interpret that? Exactly as I've wrote before. In the same chapter spec makes provision that hotplug memory entries could be in SRAT as well, which rules out using table only at boot time. But looking at APIC entry it doesn't have flag that marks it as hotplugable opposed to memory entry, so maybe we are doing useless work putting not present CPUs into it. Anyway we surely can have both in QEMU. So far, qemu/seabios do not provide this method for CPUs. So regardless of kernel behaviour, it is a good idea to add this _PXM method. Since ACPI table generation has recently been moved from seabios to qemu, we do this in qemu. Note that the above hot-remove/hot-add scenario has been tested on an older qemu + non-upstreamed patches for cpu hot-removal support, and not on qemu master (since cpu-del support is still not on master). The only testing done with qemu/seabios master and this patch, are successful boots of multi-node linux and windows8 guests. For the initial discussion on seabios and linux-acpi lists see http://www.spinics.net/lists/linux-acpi/msg47058.html Signed-off-by: Vasilis Liaskovitis vasilis.liaskovi...@profitbricks.com Reviewed-by: Thilo Fromm t...@thilo-fromm.de Even if this is a linux bug, I have no issue with working around it in qemu. But I think proper testing needs to be done with rebased upport for cpu-del. Ok, I can try to rebase cpu-del support for testing. If there are cpu-del bits already somewhere (Igor?) and not merged yet, please point me to
Re: [Qemu-devel] Buildbot failures (XEN)
On Sun, Nov 10, 2013 at 05:07:03PM +0100, Stefan Weil wrote: could you please have a look at these buildbots (maybe others, too): xen_i386_debian_6_0 xen_x86_64_debian_6_0 Both fail during the configuration: ERROR: unknown option --disable-debug-info fetching branch xen-next from git://repo.or.cz/qemu/agraf.git Alex: Is this xen tree still relevant or should I sent a buildbot config patch to drop it? Stefan
Re: [Qemu-devel] [PATCH] ui/sdl2 : initial port to SDL 2.0
On Mon, Nov 11, 2013 at 2:02 PM, Anthony Liguori anth...@codemonkey.ws wrote: On Sun, Nov 10, 2013 at 3:15 PM, Dave Airlie airl...@gmail.com wrote: From: Dave Airlie airl...@redhat.com I've ported the SDL1.2 code over, and rewritten it to use the SDL2 interface. The biggest changes were in the input handling, where SDL2 has done a major overhaul, and I've had to include a generated translation file to get from SDL2 codes back to qemu compatible ones. I'm still not sure how the keyboard layout code works in qemu, so there may be further work if someone can point me a test case that works with SDL1.2 and doesn't with SDL2. Some SDL env vars we used to set are no longer used by SDL2, Windows, OSX support is untested, I don't think we can link to SDL1.2 and SDL2 at the same time, so I felt using --with-sdlabi=2.0 to select the new code should be fine, like how gtk does it. Signed-off-by: Dave Airlie airl...@redhat.com --- configure| 23 +- ui/Makefile.objs | 4 +- ui/sdl.c | 3 + ui/sdl2.c| 889 +++ Can we refactor this to not duplicate everything and instead have function hooks or even #ifdefs for the things that are different? We try to guess the right SDL to use in configure. See how we handle GTK2 vs. GTK3. It's very hard to review ATM due to the split. No I talked to enough people at kvmforum and everyone said I should split this into a separate file, please don't make me undo that now, I originally did it with ifdefs and just spent a few days redoing it the other way! Regarding the keycodes, danpb has a great write up on his blog: https://www.berrange.com/posts/2010/07/04/a-summary-of-scan-code-key-codes-sets-used-in-the-pc-virtualization-stack/ Okay I'll read that later, Internally, we use a variant of raw XT scancodes. We have a keymapping routine that translates from a symbolic key (i.e. capital A) to the appropriate XT scancode. SDL 1.x at least lets you get at raw X11 scancodes which are either evdev or PS/2 codes depending on the version of X11. So for SDL 1.x we have two translations mechanisms 1) X11 scancodes to XT scancodes and 2) SDL keysyms to internal QEMU keysyms. From what I can tell, SDL2 has moved from just passing through raw X11 scancodes (which like I said, can be different depending on your X configuration) to having an intermediate translation layer. See comments inline: ui/sdl2_scancode_translate.h | 260 + ui/sdl_keysym.h | 3 +- 6 files changed, 1175 insertions(+), 7 deletions(-) create mode 100644 ui/sdl2.c create mode 100644 ui/sdl2_scancode_translate.h diff --git a/configure b/configure index 9addff1..bf3be37 100755 --- a/configure +++ b/configure @@ -158,6 +158,7 @@ docs= fdt= pixman= sdl= +sdlabi=1.2 virtfs= vnc=yes sparse=no @@ -310,6 +311,7 @@ query_pkg_config() { } pkg_config=query_pkg_config sdl_config=${SDL_CONFIG-${cross_prefix}sdl-config} +sdl2_config=${SDL2_CONFIG-${cross_prefix}sdl2-config} # default flags for all hosts QEMU_CFLAGS=-fno-strict-aliasing $QEMU_CFLAGS @@ -710,6 +712,8 @@ for opt do ;; --enable-sdl) sdl=yes ;; + --with-sdlabi=*) sdlabi=$optarg + ;; --disable-qom-cast-debug) qom_cast_debug=no ;; --enable-qom-cast-debug) qom_cast_debug=yes @@ -1092,6 +1096,7 @@ echo --disable-strip disable stripping binaries echo --disable-werror disable compilation abort on warning echo --disable-sdldisable SDL echo --enable-sdl enable SDL +echo --with-sdlabiselect preferred SDL ABI 1.2 or 2.0 echo --disable-gtkdisable gtk UI echo --enable-gtk enable gtk UI echo --disable-virtfs disable VirtFS @@ -1751,12 +1756,22 @@ fi # Look for sdl configuration program (pkg-config or sdl-config). Try # sdl-config even without cross prefix, and favour pkg-config over sdl-config. -if test `basename $sdl_config` != sdl-config ! has ${sdl_config}; then - sdl_config=sdl-config + +if test $sdlabi == 2.0; then +sdl_config=$sdl2_config +sdlname=sdl2 +sdlconfigname=sdl2_config +else +sdlname=sdl +sdlconfigname=sdl_config +fi + +if test `basename $sdl_config` != $sdlconfigname ! has ${sdl_config}; then + sdl_config=$sdlconfigname fi -if $pkg_config sdl --exists; then - sdlconfig=$pkg_config sdl +if $pkg_config $sdlname --exists; then + sdlconfig=$pkg_config $sdlname _sdlversion=`$sdlconfig --modversion 2/dev/null | sed 's/[^0-9]//g'` elif has ${sdl_config}; then sdlconfig=$sdl_config diff --git a/ui/Makefile.objs b/ui/Makefile.objs index f33be47..721ad37 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -9,12 +9,12 @@ vnc-obj-y += vnc-jobs.o common-obj-y += keymaps.o console.o cursor.o input.o qemu-pixman.o
Re: [Qemu-devel] [PATCH] console: Remove unused debug code
On So, 2013-11-10 at 15:58 +0100, Stefan Weil wrote: The local function console_print_text_attributes is no longer used since commit 7d6ba01c3741bc32ae252bf64a5fd3f930c2df4f. Reviewed-by: Gerd Hoffmann kra...@redhat.com cheers, Gerd
Re: [Qemu-devel] [PATCH] console: Replace conditional debug messages by trace methods
On So, 2013-11-10 at 16:04 +0100, Stefan Weil wrote: +console_putchar_csi(int esc_param0, int esc_param1, int ch, int nb_esc_params) escape sequence CSI%d;%d%c, %d parameters +console_putchar_unhandled(int ch) unhandled escape character '%c' Reviewed-by: Gerd Hoffmann kra...@redhat.com cheers, Gerd
Re: [Qemu-devel] [PATCH v2] e1000: initial link negotiation on mac osx
On Fri, Nov 08, 2013 at 10:52:09AM -0500, Gabriel L. Somlo wrote: On Fri, Nov 08, 2013 at 02:39:25PM +0100, Stefan Hajnoczi wrote: On Fri, Nov 08, 2013 at 12:12:52AM +0100, Alexander Graf wrote: We can easily modify SeaBIOS to just loop through all PCI devices, look for an e1000 and initialize it far enough for XNU, no? After all, it sounds like that's closer to the way a real Mac works. I'd much prefer Alex's suggestion so we avoid putting guest-specific hacks into QEMU. If there is really no better solution, please make an extra behavior disabled by default and accessible through a device property. For example -device e1000,xnu-preinit-hack=on. I agree too, in principle. OTOH I'm a bit worried that teaching SeaBIOS about e1000, and then getting that change upstreamed there might be a whole different size of problem to solve :) I will however give that a shot first, and fall back to xnu-preinit-hack=on only if that doesn't work out... The other approach is to look at iPXE, the PXE boot ROM that QEMU ships for the e1000 NIC. It has an e1000 driver and you might find a hack to get things working: Either see if you can chainload the bootloader on the harddisk after having initialized the e1000 in iPXE. (Start a network boot but then use the 'boot' or 'chain' commands in iPXE.) Or consider adding code to pre-initialize the e1000 to iPXE. Whether that hack will be accepted by the iPXE community is a different question but this still pushes the hack into the guest firmware - closer to where it lives on the real hardware. Stefan
Re: [Qemu-devel] [PATCH for-1.7 v2] block: Print its file name if backing file opening failed
On Fri, Nov 08, 2013 at 11:26:49AM +0800, Fam Zheng wrote: If backing file doesn't exist, the error message is confusing and misleading: $ qemu /tmp/a.qcow2 qemu: could not open disk image /tmp/a.qcow2: Could not open file: No such file or directory But... $ ls /tmp/a.qcow2 /tmp/a.qcow2 $ qemu-img info /tmp/a.qcow2 image: /tmp/a.qcow2 file format: qcow2 virtual size: 8.0G (8589934592 bytes) disk size: 196K cluster_size: 65536 backing file: /tmp/b.qcow2 Because... $ ls /tmp/b.qcow2 ls: cannot access /tmp/b.qcow2: No such file or directory This is not intuitive. It's better to have the missing file's name in the error message. With this patch: $ qemu-io -c 'read 0 512' /tmp/a.qcow2 qemu-io: can't open device /tmp/a.qcow2: Could not open backing file: Could not open '/stor/vm/arch.raw': No such file or directory no file open, try 'help open' Which is a little bit better. Signed-off-by: Fam Zheng f...@redhat.com --- v2: Don't leak local_err (Eric). Signed-off-by: Fam Zheng f...@redhat.com --- block.c| 4 +++- block/raw-posix.c | 1 - block/raw-win32.c | 1 - tests/qemu-iotests/051.out | 2 +- tests/qemu-iotests/069.out | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
2013/11/5 Stefan Hajnoczi stefa...@gmail.com: On Mon, Nov 04, 2013 at 11:28:41AM +0100, Matthias Brugger wrote: v2: - fix issues found by checkpatch.pl - change the descritpion of patch 3 This patch series makes the thread pool implementation modular. This allows each drive to use a special implementation. The patch series prepares qemu to be able to include thread pools different to the one actually implemented. It will allow to implement approaches like paravirtualized block requests [1]. [1] http://www.linux-kvm.org/wiki/images/5/53/2012-forum-Brugger-lightningtalk.pdf -drive aio=threads|native already distinguishes between different AIO implementations. IMO -drive aio= is the logical interface if you want to add a new AIO mechanism. Good point. I'd also like to see the thread pool implementation you wish to add before we add a layer of indirection which has no users yet. Fair enough, I will evaluate if it will make more sense to implement a new AIO infrastructure instead to try reuse the thread-pool. Actually my implementation will differ in the way, that we will have several workerthreads with everyone of them having its own queue. The requests will be distributed between them depending on an identifier. The request function which the worker_thread call will be the same as using aio=threads, so I'm not quite sure which will be the way to go. Any opinions and hints, like the one you gave are highly appreciated. -- motzblog.wordpress.com
Re: [Qemu-devel] [PATCH 2/2] exec: make address spaces 64-bit wide
Il 10/11/2013 11:31, Michael S. Tsirkin ha scritto: Reported-by: Luiz Capitulino lcapitul...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com So this causes a 12% performance regression on some TCG tests, I think we should look into a smarter datastructure to solve the issues. It causes a 12% performance regression in a single testcase where KVM has a 150x performance regression. This says a lot about the relevance of the testcase. In any case, I have patches to avoid the regression. For 1.7 we can just revert the patches, for 1.8 we can apply this patch together with the optimizations that avoid introducing a regression. Paolo
Re: [Qemu-devel] [PATCH v2] block: per caller dirty bitmap
On Mon, Nov 04, 2013 at 11:55:47AM +0100, Paolo Bonzini wrote: Il 04/11/2013 11:47, Fam Zheng ha scritto: -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity); -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi); -int64_t bdrv_get_dirty_count(BlockDriverState *bs); +void bdrv_dirty_iter_init(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); You do not really need the BDS argument to the functions, do you? (Or do you have other plans?) I just wanted to keep the pattern of those bdrv_* family, no other plans for it. Kevin, Stefan, any second opinions? I was thinking the same thing and arrived at the conclusion that since it's bdrv_*() and not bbitmap_*() we should keep the explicit BDS argument. Stefan
Re: [Qemu-devel] [PATCH v2] block: per caller dirty bitmap
On Mon, Nov 04, 2013 at 05:30:10PM +0800, Fam Zheng wrote: @@ -2785,9 +2792,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, ret = bdrv_co_flush(bs); } -if (bs-dirty_bitmap) { bdrv_set_dirty(bs, sector_num, nb_sectors); -} Forgot to reduce indentation?
Re: [Qemu-devel] [PATCH v2] block: per caller dirty bitmap
On Mon, Nov 04, 2013 at 05:30:10PM +0800, Fam Zheng wrote: Previously a BlockDriverState has only one dirty bitmap, so only one caller (e.g. a block job) can keep track of writing. This changes the dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the lifecycle is managed with these new functions: bdrv_create_dirty_bitmap bdrv_release_dirty_bitmap Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap. In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument is added to these functions, since each caller has its own dirty bitmap: bdrv_get_dirty bdrv_dirty_iter_init bdrv_get_dirty_count bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will internally walk the list of all dirty bitmaps and set them one by one. Signed-off-by: Fam Zheng f...@redhat.com --- v2: Added BdrvDirtyBitmap [Paolo] Signed-off-by: Fam Zheng f...@redhat.com --- block-migration.c | 22 + block.c | 81 --- block/mirror.c| 23 -- block/qapi.c | 8 - include/block/block.h | 11 --- include/block/block_int.h | 2 +- 6 files changed, 85 insertions(+), 62 deletions(-) Happy with this modulo the indentation fixup I commented on. Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
Re: [Qemu-devel] [PATCH] ui/sdl2 : initial port to SDL 2.0
Il 11/11/2013 10:10, Dave Airlie ha scritto: It's very hard to review ATM due to the split. No I talked to enough people at kvmforum and everyone said I should split this into a separate file, please don't make me undo that now, I originally did it with ifdefs and just spent a few days redoing it the other way! You can try using options for git format-patch like -C10 --find-copies-harder. Paolo
Re: [Qemu-devel] [PATCH v3 5/6] bitops: add BITNR macro
Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This adds a macro to calculate the highest bit set. Isn't that already available as ffs / clz GCC builtin with wrapper in qemu/bitops.h? What's the difference to your macro? CC'ing Paolo. Andreas If used on constant values, no code will be generated. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- include/qemu/bitops.h | 12 1 file changed, 12 insertions(+) diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 304c90c..98ba42a 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -23,6 +23,18 @@ #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BITS_TO_LONGS(nr)DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define __BITNR(m, n) ((m) == ((m) (1(n ? (n) : +#define BITNR(m) \ +__BITNR((m), 31) __BITNR((m), 30) __BITNR((m), 29) __BITNR((m), 28) \ +__BITNR((m), 27) __BITNR((m), 26) __BITNR((m), 25) __BITNR((m), 24) \ +__BITNR((m), 23) __BITNR((m), 22) __BITNR((m), 21) __BITNR((m), 20) \ +__BITNR((m), 19) __BITNR((m), 18) __BITNR((m), 17) __BITNR((m), 16) \ +__BITNR((m), 15) __BITNR((m), 14) __BITNR((m), 13) __BITNR((m), 12) \ +__BITNR((m), 11) __BITNR((m), 10) __BITNR((m), 9) __BITNR((m), 8) \ +__BITNR((m), 7) __BITNR((m), 6) __BITNR((m), 5) __BITNR((m), 4) \ +__BITNR((m), 3) __BITNR((m), 2) __BITNR((m), 1) __BITNR((m), 0) \ +-1 + /** * set_bit - Set a bit in memory * @nr: the bit to set -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] [PATCH v3 5/6] bitops: add BITNR macro
On 11/11/2013 10:57 PM, Andreas Färber wrote: Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This adds a macro to calculate the highest bit set. Isn't that already available as ffs / clz GCC builtin with wrapper in qemu/bitops.h? What's the difference to your macro? CC'ing Paolo. I am ignorant and did not know this simple fact. Sorry. Drop this patch. Andreas If used on constant values, no code will be generated. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- include/qemu/bitops.h | 12 1 file changed, 12 insertions(+) diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 304c90c..98ba42a 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -23,6 +23,18 @@ #define BIT_WORD(nr)((nr) / BITS_PER_LONG) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define __BITNR(m, n) ((m) == ((m) (1(n ? (n) : +#define BITNR(m) \ +__BITNR((m), 31) __BITNR((m), 30) __BITNR((m), 29) __BITNR((m), 28) \ +__BITNR((m), 27) __BITNR((m), 26) __BITNR((m), 25) __BITNR((m), 24) \ +__BITNR((m), 23) __BITNR((m), 22) __BITNR((m), 21) __BITNR((m), 20) \ +__BITNR((m), 19) __BITNR((m), 18) __BITNR((m), 17) __BITNR((m), 16) \ +__BITNR((m), 15) __BITNR((m), 14) __BITNR((m), 13) __BITNR((m), 12) \ +__BITNR((m), 11) __BITNR((m), 10) __BITNR((m), 9) __BITNR((m), 8) \ +__BITNR((m), 7) __BITNR((m), 6) __BITNR((m), 5) __BITNR((m), 4) \ +__BITNR((m), 3) __BITNR((m), 2) __BITNR((m), 1) __BITNR((m), 0) \ +-1 + /** * set_bit - Set a bit in memory * @nr: the bit to set -- Alexey
Re: [Qemu-devel] [PATCH v2 1/3] Make thread pool implementation modular
Am 04.11.2013 um 11:28 hat Matthias Brugger geschrieben: This patch introduces function pointers for the thread pool, so that it's implementation can be set at run-time. Signed-off-by: Matthias Brugger matthias@gmail.com Like Stefan said, this can really only be given a meaningful review in context with an actual user of the infrastructure, but I'll mention some minor points in this series anyway. include/block/thread-pool.h | 11 +++ thread-pool.c | 33 + 2 files changed, 44 insertions(+) diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h index 32afcdd..1f73712 100644 --- a/include/block/thread-pool.h +++ b/include/block/thread-pool.h @@ -38,4 +38,15 @@ int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func, void *arg); void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); +ThreadPoolFuncArr *thread_pool_probe(void); +void thread_pool_delete(ThreadPoolFuncArr *tpf); + +struct ThreadPoolFuncArr { If Arr is supposed to mean array, this isn't really one. Similar structs containing function pointers have names ending in Info (AIOCBInfo, NetClientInfo) or Driver (BlockDriver). +BlockDriverAIOCB *(*thread_pool_submit_aio)(ThreadPool *pool, +ThreadPoolFunc *func, void *arg, BlockDriverCompletionFunc *cb, +void *opaque); +ThreadPool *(*thread_pool_new)(AioContext *ctx); +}; + + #endif diff --git a/thread-pool.c b/thread-pool.c index 3735fd3..53294a9 100644 --- a/thread-pool.c +++ b/thread-pool.c @@ -26,6 +26,7 @@ #include qemu/main-loop.h static void do_spawn_thread(ThreadPool *pool); +static void thread_pool_aio_free(ThreadPool *pool); typedef struct ThreadPoolElement ThreadPoolElement; @@ -77,6 +78,7 @@ struct ThreadPool { int pending_threads; /* threads created but not running yet */ int pending_cancellations; /* whether we need a cond_broadcast */ bool stopping; +void (*thread_pool_free)(ThreadPool *pool); }; static void *worker_thread(void *opaque) @@ -300,6 +302,7 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) qemu_sem_init(pool-sem, 0); pool-max_threads = 64; pool-new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); +pool-thread_pool_free = thread_pool_aio_free; QLIST_INIT(pool-head); QTAILQ_INIT(pool-request_list); @@ -316,6 +319,11 @@ ThreadPool *thread_pool_new(AioContext *ctx) void thread_pool_free(ThreadPool *pool) { +pool-thread_pool_free(pool); +} + +void thread_pool_aio_free(ThreadPool *pool) +{ if (!pool) { return; } @@ -346,3 +354,28 @@ void thread_pool_free(ThreadPool *pool) event_notifier_cleanup(pool-notifier); g_free(pool); } + +ThreadPoolFuncArr *thread_pool_probe(void) What is probed here? Isn't it just a function that creates a thread pool? +{ +ThreadPoolFuncArr *tpf_pool = NULL; + +if (tpf_pool) { +return tpf_pool; +} + +tpf_pool = g_new(ThreadPoolFuncArr, 1); +if (!tpf_pool) { +printf(error allocating thread pool\n); +return NULL; +} g_new() doesn't fail. + +tpf_pool-thread_pool_submit_aio = thread_pool_submit_aio; +tpf_pool-thread_pool_new = thread_pool_new; + +return tpf_pool; +} + +void thread_pool_delete(ThreadPoolFuncArr *tpf) +{ +g_free(tpf); +} Kevin
Re: [Qemu-devel] [PATCH v2 3/3] Add workerthreads configuration option
Am 04.11.2013 um 11:28 hat Matthias Brugger geschrieben: This patch allows to choose at the command line level which thread pool implementation will be used by every block device. Signed-off-by: Matthias Brugger matthias@gmail.com This would require a change to blockdev-add in qapi-schema.json in order to make the option available using QMP. Kevin
Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments
Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This converts +foo/-foo to foo=on/foo=off respectively when QEMU parser is used for the command line options. -cpu parsers in x86 and other architectures should be unaffected by this change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- util/qemu-option.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/util/qemu-option.c b/util/qemu-option.c index efcb5dc..6c8667c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char *params, if (strncmp(option, no, 2) == 0) { memmove(option, option+2, strlen(option+2)+1); pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, -, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, +, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), on); } else { pstrcpy(value, sizeof(value), on); } This looks like an interesting idea! However this is much too big a change to just CC ppc folks on... Jan, I wonder if this might break slirp's hostfwd option? Not sure what other options potentially starting with '-' might be affected. Test cases would be a helpful way of demonstrating that this change does not have undesired side effects. Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
On Mon, Nov 11, 2013 at 11:00:45AM +0100, Matthias Brugger wrote: 2013/11/5 Stefan Hajnoczi stefa...@gmail.com: I'd also like to see the thread pool implementation you wish to add before we add a layer of indirection which has no users yet. Fair enough, I will evaluate if it will make more sense to implement a new AIO infrastructure instead to try reuse the thread-pool. Actually my implementation will differ in the way, that we will have several workerthreads with everyone of them having its own queue. The requests will be distributed between them depending on an identifier. The request function which the worker_thread call will be the same as using aio=threads, so I'm not quite sure which will be the way to go. Any opinions and hints, like the one you gave are highly appreciated. If I understand the slides you linked to correctly, the guest will pass an identifier with each request. The host has worker threads allowing each stream of requests to be serviced independently. The idea is to associate guest processes with unique identifiers. The guest I/O scheduler is supposed to submit requests in a way that meets certain policies (e.g. fairness between processes, deadlines, etc). Why is it necessary to push this task down into the host? I don't understand the advantage of this approach except that maybe it works around certain misconfigurations, I/O scheduler quirks, or plain old bugs - all of which should be investigated and fixed at the source instead of adding another layer of code to mask them. Stefan
Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments
On 2013-11-11 13:41, Andreas Färber wrote: Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This converts +foo/-foo to foo=on/foo=off respectively when QEMU parser is used for the command line options. -cpu parsers in x86 and other architectures should be unaffected by this change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- util/qemu-option.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/util/qemu-option.c b/util/qemu-option.c index efcb5dc..6c8667c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char *params, if (strncmp(option, no, 2) == 0) { memmove(option, option+2, strlen(option+2)+1); pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, -, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, +, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), on); } else { pstrcpy(value, sizeof(value), on); } This looks like an interesting idea! However this is much too big a change to just CC ppc folks on... Jan, I wonder if this might break slirp's hostfwd option? hostfwd starts with : in the simplest case - or what pattern do you have in mind? Jan Not sure what other options potentially starting with '-' might be affected. Test cases would be a helpful way of demonstrating that this change does not have undesired side effects. Regards, Andreas -- Siemens AG, Corporate Technology, CT RTC ITP SES-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard
Am 24.10.2013 um 12:06 hat Peter Lieven geschrieben: Reviewed-by: Eric Blake ebl...@redhat.com Signed-off-by: Peter Lieven p...@kamp.de --- block.c | 37 - 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 0c0b0ac..b28dd42 100644 --- a/block.c +++ b/block.c @@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors); } +/* if no limit is specified in the BlockLimits use a default + * of 32768 512-byte sectors (16 MiB) per request. + */ +#define MAX_DISCARD_DEFAULT 32768 + int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, } if (bs-drv-bdrv_co_discard) { -return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors); +int max_discard = bs-bl.max_discard ? + bs-bl.max_discard : MAX_DISCARD_DEFAULT; + +while (nb_sectors 0) { +int ret; +int num = nb_sectors; + +/* align request */ +if (bs-bl.discard_alignment +num = bs-bl.discard_alignment +sector_num % bs-bl.discard_alignment) { +if (num bs-bl.discard_alignment) { +num = bs-bl.discard_alignment; +} +num -= sector_num % bs-bl.discard_alignment; +} + +/* limit request size */ +if (num max_discard) { +num = max_discard; +} + +ret = bs-drv-bdrv_co_discard(bs, sector_num, num); +if (ret) { +return ret; +} + +sector_num += num; +nb_sectors -= num; +} +return 0; } else if (bs-drv-bdrv_aio_discard) { BlockDriverAIOCB *acb; CoroutineIOCompletion co = { You're only optimising drivers which have a .bdrv_co_discard() implementation, but not those with .bdrv_aio_discard(). Not very nice, and it would have been easy to avoid this by putting the loop around the whole if statement instead of inside the 'then' branch. Kevin
Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments
Am 11.11.2013 13:52, schrieb Jan Kiszka: On 2013-11-11 13:41, Andreas Färber wrote: Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This converts +foo/-foo to foo=on/foo=off respectively when QEMU parser is used for the command line options. -cpu parsers in x86 and other architectures should be unaffected by this change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- util/qemu-option.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/util/qemu-option.c b/util/qemu-option.c index efcb5dc..6c8667c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char *params, if (strncmp(option, no, 2) == 0) { memmove(option, option+2, strlen(option+2)+1); pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, -, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, +, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), on); } else { pstrcpy(value, sizeof(value), on); } This looks like an interesting idea! However this is much too big a change to just CC ppc folks on... Jan, I wonder if this might break slirp's hostfwd option? hostfwd starts with : in the simplest case - or what pattern do you have in mind? Ah right, I had :8022-:22 or so in mind and mixed up optional host name with optional source port. Basically I'm checking for anything which is using the generic QemuOpts parsing and where a literal + or - may lead to unexpected parsing changes with this patch. Without having looked up more context for this hunk, it should not affect foo=-bar but only where foo= is optional, such as type names for driver= starting with either character (not aware of such types though). Andreas Not sure what other options potentially starting with '-' might be affected. Test cases would be a helpful way of demonstrating that this change does not have undesired side effects. -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] [PATCH v3 6/6] target-ppc: demonstrate new vsx property
Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This patch is to demonstrate a static property handling in PowerPC. Running QEMU with -cpu host,-vsx disables VSX bit in PowerPCCPU::env::flags. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- target-ppc/translate_init.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index df0d81c..60ea235 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -29,6 +29,7 @@ #include mmu-hash64.h #include qemu/error-report.h #include qapi/visitor.h +#include hw/qdev-properties.h //#define PPC_DUMP_CPU //#define PPC_DEBUG_SPR @@ -8740,6 +8741,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) dc-fw_name = PowerPC,UNKNOWN; cc-parse_options = cpu_default_parse_options_func; + +static Property powerpc_properties[] = { +DEFINE_PROP_BIT(vsx, PowerPCCPU, env.flags, BITNR(POWERPC_FLAG_VSX), false), +DEFINE_PROP_END_OF_LIST(), +}; +dc-props = powerpc_properties; } static const TypeInfo ppc_cpu_type_info = { This type of static property looks good to me, but two problems apart from the BITNR() discussed elsewhere: 1) Won't the default value of false always disable VSX for all models including POWER7 and higher? 2) Please move the array out of the function to just before the containing function. Igor has been facing a similar issue in his refactoring of x86 models and I believe evaluated to define the properties per model so that they have different defaults. Another way is overwriting that default in the model's instance_init or via globals specific to the subtype. Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
[Qemu-devel] [PATCH] block: Fail if requested driver is not available
If an explicit driver option is present, but doesn't specify a valid driver, then bdrv_open() should fail instead of probing the format. Signed-off-by: Kevin Wolf kw...@redhat.com --- block.c| 5 + tests/qemu-iotests/051 | 7 +++ tests/qemu-iotests/051.out | 9 + 3 files changed, 21 insertions(+) diff --git a/block.c b/block.c index d7de89d..6256b09 100644 --- a/block.c +++ b/block.c @@ -1130,6 +1130,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, if (drvname) { drv = bdrv_find_format(drvname); qdict_del(options, driver); +if (!drv) { +error_setg(errp, Invalid driver: '%s', drvname); +ret = -EINVAL; +goto unlink_and_fail; +} } if (!drv) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index b1480a8..4df8058 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -78,6 +78,13 @@ run_qemu -drive file=$TEST_IMG,format=qcow2,unknown_opt=1234 run_qemu -drive file=$TEST_IMG,format=qcow2,unknown_opt=foo echo +echo === Invalid format === +echo + +run_qemu -drive file=$TEST_IMG,format=foo +run_qemu -drive file=$TEST_IMG,driver=foo + +echo echo === Overriding backing file === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 1ee010d..088735a 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -17,6 +17,15 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +=== Invalid format === + +Testing: -drive file=TEST_DIR/t.qcow2,format=foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format + +Testing: -drive file=TEST_DIR/t.qcow2,driver=foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Invalid driver: 'foo' + + === Overriding backing file === Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,backing.file.filename=TEST_DIR/t.qcow2.orig -nodefaults -- 1.8.1.4
Re: [Qemu-devel] [PATCH] ui/sdl2 : initial port to SDL 2.0
On Nov 11, 2013 1:10 AM, Dave Airlie airl...@gmail.com wrote: On Mon, Nov 11, 2013 at 2:02 PM, Anthony Liguori anth...@codemonkey.ws wrote: On Sun, Nov 10, 2013 at 3:15 PM, Dave Airlie airl...@gmail.com wrote: From: Dave Airlie airl...@redhat.com I've ported the SDL1.2 code over, and rewritten it to use the SDL2 interface. The biggest changes were in the input handling, where SDL2 has done a major overhaul, and I've had to include a generated translation file to get from SDL2 codes back to qemu compatible ones. I'm still not sure how the keyboard layout code works in qemu, so there may be further work if someone can point me a test case that works with SDL1.2 and doesn't with SDL2. Some SDL env vars we used to set are no longer used by SDL2, Windows, OSX support is untested, I don't think we can link to SDL1.2 and SDL2 at the same time, so I felt using --with-sdlabi=2.0 to select the new code should be fine, like how gtk does it. Signed-off-by: Dave Airlie airl...@redhat.com --- configure| 23 +- ui/Makefile.objs | 4 +- ui/sdl.c | 3 + ui/sdl2.c| 889 +++ Can we refactor this to not duplicate everything and instead have function hooks or even #ifdefs for the things that are different? We try to guess the right SDL to use in configure. See how we handle GTK2 vs. GTK3. It's very hard to review ATM due to the split. No I talked to enough people at kvmforum and everyone said I should split this into a separate file, please don't make me undo that now, I originally did it with ifdefs and just spent a few days redoing it the other way! Perhaps whoever you spoke with should speal up then. Forking sdl.c seems like a pretty bad idea to me. Regarding the keycodes, danpb has a great write up on his blog: https://www.berrange.com/posts/2010/07/04/a-summary-of-scan-code-key-codes-sets-used-in-the-pc-virtualization-stack/ Okay I'll read that later, Internally, we use a variant of raw XT scancodes. We have a keymapping routine that translates from a symbolic key (i.e. capital A) to the appropriate XT scancode. SDL 1.x at least lets you get at raw X11 scancodes which are either evdev or PS/2 codes depending on the version of X11. So for SDL 1.x we have two translations mechanisms 1) X11 scancodes to XT scancodes and 2) SDL keysyms to internal QEMU keysyms. From what I can tell, SDL2 has moved from just passing through raw X11 scancodes (which like I said, can be different depending on your X configuration) to having an intermediate translation layer. See comments inline: ui/sdl2_scancode_translate.h | 260 + ui/sdl_keysym.h | 3 +- 6 files changed, 1175 insertions(+), 7 deletions(-) create mode 100644 ui/sdl2.c create mode 100644 ui/sdl2_scancode_translate.h diff --git a/configure b/configure index 9addff1..bf3be37 100755 --- a/configure +++ b/configure @@ -158,6 +158,7 @@ docs= fdt= pixman= sdl= +sdlabi=1.2 virtfs= vnc=yes sparse=no @@ -310,6 +311,7 @@ query_pkg_config() { } pkg_config=query_pkg_config sdl_config=${SDL_CONFIG-${cross_prefix}sdl-config} +sdl2_config=${SDL2_CONFIG-${cross_prefix}sdl2-config} # default flags for all hosts QEMU_CFLAGS=-fno-strict-aliasing $QEMU_CFLAGS @@ -710,6 +712,8 @@ for opt do ;; --enable-sdl) sdl=yes ;; + --with-sdlabi=*) sdlabi=$optarg + ;; --disable-qom-cast-debug) qom_cast_debug=no ;; --enable-qom-cast-debug) qom_cast_debug=yes @@ -1092,6 +1096,7 @@ echo --disable-strip disable stripping binaries echo --disable-werror disable compilation abort on warning echo --disable-sdldisable SDL echo --enable-sdl enable SDL +echo --with-sdlabiselect preferred SDL ABI 1.2 or 2.0 echo --disable-gtkdisable gtk UI echo --enable-gtk enable gtk UI echo --disable-virtfs disable VirtFS @@ -1751,12 +1756,22 @@ fi # Look for sdl configuration program (pkg-config or sdl-config). Try # sdl-config even without cross prefix, and favour pkg-config over sdl-config. -if test `basename $sdl_config` != sdl-config ! has ${sdl_config}; then - sdl_config=sdl-config + +if test $sdlabi == 2.0; then +sdl_config=$sdl2_config +sdlname=sdl2 +sdlconfigname=sdl2_config +else +sdlname=sdl +sdlconfigname=sdl_config +fi + +if test `basename $sdl_config` != $sdlconfigname ! has ${sdl_config}; then + sdl_config=$sdlconfigname fi -if $pkg_config sdl --exists; then - sdlconfig=$pkg_config sdl +if $pkg_config $sdlname --exists; then + sdlconfig=$pkg_config $sdlname _sdlversion=`$sdlconfig --modversion 2/dev/null | sed 's/[^0-9]//g'` elif has ${sdl_config}; then
Re: [Qemu-devel] [PATCH] block: Fail if requested driver is not available
On Mon, Nov 11, 2013 at 02:54:59PM +0100, Kevin Wolf wrote: If an explicit driver option is present, but doesn't specify a valid driver, then bdrv_open() should fail instead of probing the format. Signed-off-by: Kevin Wolf kw...@redhat.com --- block.c| 5 + tests/qemu-iotests/051 | 7 +++ tests/qemu-iotests/051.out | 9 + 3 files changed, 21 insertions(+) diff --git a/block.c b/block.c index d7de89d..6256b09 100644 --- a/block.c +++ b/block.c @@ -1130,6 +1130,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, if (drvname) { drv = bdrv_find_format(drvname); qdict_del(options, driver); +if (!drv) { +error_setg(errp, Invalid driver: '%s', drvname); +ret = -EINVAL; +goto unlink_and_fail; +} } if (!drv) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index b1480a8..4df8058 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -78,6 +78,13 @@ run_qemu -drive file=$TEST_IMG,format=qcow2,unknown_opt=1234 run_qemu -drive file=$TEST_IMG,format=qcow2,unknown_opt=foo echo +echo === Invalid format === +echo + +run_qemu -drive file=$TEST_IMG,format=foo +run_qemu -drive file=$TEST_IMG,driver=foo + +echo echo === Overriding backing file === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 1ee010d..088735a 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -17,6 +17,15 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=qcow2,unknown_opt=foo: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device 'ide0-hd0' doesn't support the option 'unknown_opt' +=== Invalid format === + +Testing: -drive file=TEST_DIR/t.qcow2,format=foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format + +Testing: -drive file=TEST_DIR/t.qcow2,driver=foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Invalid driver: 'foo' + + === Overriding backing file === Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,backing.file.filename=TEST_DIR/t.qcow2.orig -nodefaults -- 1.8.1.4 Reviewed-by: Jeff Cody jc...@redhat.com
Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments
On Mon, 11 Nov 2013 13:41:05 +0100 Andreas Färber afaer...@suse.de wrote: Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy: This converts +foo/-foo to foo=on/foo=off respectively when QEMU parser is used for the command line options. -cpu parsers in x86 and other architectures should be unaffected by this change. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- util/qemu-option.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/util/qemu-option.c b/util/qemu-option.c index efcb5dc..6c8667c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char *params, if (strncmp(option, no, 2) == 0) { memmove(option, option+2, strlen(option+2)+1); pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, -, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), off); +} else if (strncmp(option, +, 1) == 0) { +memmove(option, option+1, strlen(option+1)+1); +pstrcpy(value, sizeof(value), on); } else { pstrcpy(value, sizeof(value), on); } This looks like an interesting idea! However this is much too big a change to just CC ppc folks on... Jan, I wonder if this might break slirp's hostfwd option? Not sure what other options potentially starting with '-' might be affected. Test cases would be a helpful way of demonstrating that this change does not have undesired side effects. on x86 there is several value fixups for compatibility reason and a manual value parsing in cpu_x86_parse_featurestr(), so above won't just work there. Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg -- Regards, Igor
[Qemu-devel] xsave instruction not passed through correctly on AMD hardware
Hi all, We've been seeing a problem lately running FreeBSD 9.1 and 9.2 (latest stable) which causes the guest to crash during boot when QEMU is run on an AMD processor with the 'xsave' flag set. To reproduce this behaviour: - Boot a FreeBSD 9.1 or 9.2 guest or even installation CD on an AMD processor with xsave enabled. Use '-cpu host'. - After the bootloader, the guest will crash almost immediately with the message 'kernel trap 12 with interrupts disabled'. This occurs before any disks are loaded, so it's not possible to get a memory dump from the guest OS for backtrace. - Boot again with '-cpu host,-xsave'. The guest should boot successfully. This was seen on AMD Opteron 6238 processor family, and does not affect our Opteron 6128s (due to lack of the xsave flag). We've also tested on an Intel Xeon E5-2640 processor which has the xsave flag set and verified that we do not see this behaviour. Based on this, I believe that the xsave instruction is not being correctly emulated on some hardware. Is this a known issue? Thanks in advance for looking, and please let me know if we can provide any more useful information to help diagnose/fix this. Best regards, Owen Tuz
Re: [Qemu-devel] [PATCH for-1.7 v2] block: Print its file name if backing file opening failed
Am 08.11.2013 um 04:26 hat Fam Zheng geschrieben: If backing file doesn't exist, the error message is confusing and misleading: $ qemu /tmp/a.qcow2 qemu: could not open disk image /tmp/a.qcow2: Could not open file: No such file or directory But... $ ls /tmp/a.qcow2 /tmp/a.qcow2 $ qemu-img info /tmp/a.qcow2 image: /tmp/a.qcow2 file format: qcow2 virtual size: 8.0G (8589934592 bytes) disk size: 196K cluster_size: 65536 backing file: /tmp/b.qcow2 Because... $ ls /tmp/b.qcow2 ls: cannot access /tmp/b.qcow2: No such file or directory This is not intuitive. It's better to have the missing file's name in the error message. With this patch: $ qemu-io -c 'read 0 512' /tmp/a.qcow2 qemu-io: can't open device /tmp/a.qcow2: Could not open backing file: Could not open '/stor/vm/arch.raw': No such file or directory no file open, try 'help open' Which is a little bit better. Signed-off-by: Fam Zheng f...@redhat.com Thanks, applied to the block branch. However, while this may be an improvement, it's certainly not what we'd want the error message to look like in the final state. Consider a chain with multiple backing files: qemu-system-x86_64: -drive file=/tmp/d.qcow2: could not open disk image /tmp/d.qcow2: Could not open backing file: Could not open backing file: Could not open backing file: Could not open backing file: Could not open '/tmp/blubber': No such file or directory Kevin
Re: [Qemu-devel] [PATCH v2] flatload: fix non-GOT relocations
Ping http://patchwork.ozlabs.org/patch/280764/ On Sat, Oct 5, 2013 at 7:46 AM, Corey J. Boyle co...@kansanian.com wrote: From: Corey J. Boyle co...@kansanian.com Use target address rather than host address when performing non-GOT relocations Signed-off-by: Corey J. Boyle co...@kansanian.com --- linux-user/flatload.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/flatload.c b/linux-user/flatload.c index 58f679e..ceb89bb 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -633,7 +633,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* Get the pointer's value. */ if (get_user_ual(addr, rp)) return -EFAULT; -addr = flat_get_addr_from_rp(rp, relval, flags, persistent); +addr = flat_get_addr_from_rp(addr, relval, flags, persistent); if (addr != 0) { /* * Do the relocation. PIC relocs in the data section are -- 1.7.9.5
Re: [Qemu-devel] Questions about Spice pv domUs
On Thu, 7 Nov 2013, Fabio Fantoni wrote: The xenfb code is here: hw/display/xenfb.c It is registered here: hw/i386/xen_machine_pv.c:xen_init_pv Thanks, then I must search on qemu code what xenstore parameters enable xenfb and after search on xen, right? Yes, or you can look at libxl__device_vfb_add.
[Qemu-devel] [RFC] target-arm: provide skeleton for a64 insn decoding
provide a skeleton for a64 instruction decoding in translate-a64.c, by dividing instructions into the classes defined by the ARM Architecture Reference Manual(DDI0487A_a) C3 Signed-off-by: Claudio Fontana claudio.font...@linaro.org --- The following patch has been started during Linaro Connect by me and Alex Bennee. The goal is to provide a decoder that is easy to match against the ARM Architecture Reference Manual. The plan here is a process of cleaning up / refactoring the SuSE patchset. We will be posting actual instruction implementations in the following days and weeks. However, as we currently have between 60-120 patches in the backlog we thought it would be worth getting the basic decoding skeleton agreed (and merged?) first to reduce any patch ordering problems and ease the reviewing burden. The first set of instruction patches will be based on Alexander Graf's 60 patch set after we have applied fixes, review comments and run some instruction testing using Peter Maydell's risu tool patched for aarch64: https://github.com/hw-claudio/risu-aarch64.git (master) We hope from there to quickly progress through the rest of the SUSE patch-set for getting a functional aarch64 linux-user setup before turning our attention to system emulation. target-arm/translate-a64.c | 367 - 1 file changed, 359 insertions(+), 8 deletions(-) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index f120088..7105728 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -107,17 +107,345 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp) s-is_jmp = DISAS_JUMP; } -static void real_unallocated_encoding(DisasContext *s) +static void unallocated_encoding(DisasContext *s) { -fprintf(stderr, Unknown instruction: %#x\n, s-insn); gen_exception_insn(s, 4, EXCP_UDEF); } -#define unallocated_encoding(s) do { \ -fprintf(stderr, unallocated encoding at line: %d\n, __LINE__); \ -real_unallocated_encoding(s); \ -} while (0) +#define unsupported_encoding(s, insn)\ +do { \ +qemu_log_mask(LOG_UNIMP,\ + %s:%d: unsupported instruction encoding 0x%08x, \ + __FILE__, __LINE__, insn);\ +unallocated_encoding(s);\ +} while (0); +/* + * the instruction disassembly implemented here matches + * the instruction encoding classifications in chapter 3 (C3) + * of the ARM Architecture Reference Manual (DDI0487A_a) + */ + +/* Unconditional branch (immediate) */ +static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Compare branch (immediate) */ +static void disas_comp_b_imm(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Test branch (immediate) */ +static void disas_test_b_imm(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Conditional branch (immediate) */ +static void disas_cond_b_imm(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* System */ +static void disas_sys(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Exception generation */ +static void disas_exc(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Unconditional branch (register) */ +static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* C3.2 Branches, exception generating and system instructions */ +static void disas_b_exc_sys(DisasContext *s, uint32_t insn) +{ +switch (extract32(insn, 25, 7)) { +case 0x0a: case 0x4a: /* Unconditional branch (immediate) */ +disas_uncond_b_imm(s, insn); +break; +case 0x1a: case 0x5a: /* Compare branch (immediate) */ +disas_comp_b_imm(s, insn); +break; +case 0x1b: case 0x5b: /* Test branch (immediate) */ +disas_test_b_imm(s, insn); +break; +case 0x2a: /* Conditional branch (immediate) */ +disas_cond_b_imm(s, insn); +break; +case 0x6a: /* Exception generation / System */ +if (insn (1 24)) { +disas_sys(s, insn); +} else { +disas_exc(s, insn); +} +break; +case 0x6b: /* Unconditional branch (register) */ +disas_uncond_b_reg(s, insn); +break; +default: +unallocated_encoding(s); +break; +} +} + +/* Load/store exclusive */ +static void disas_ldst_excl(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Load register (literal) */ +static void disas_ld_lit(DisasContext *s, uint32_t insn) +{ +unsupported_encoding(s, insn); +} + +/* Load/store pair (all forms) */ +static void
Re: [Qemu-devel] xsave instruction not passed through correctly on AMD hardware
Il 11/11/2013 15:30, Owen Tuz ha scritto: Hi all, We've been seeing a problem lately running FreeBSD 9.1 and 9.2 (latest stable) which causes the guest to crash during boot when QEMU is run on an AMD processor with the 'xsave' flag set. To reproduce this behaviour: - Boot a FreeBSD 9.1 or 9.2 guest or even installation CD on an AMD processor with xsave enabled. Use '-cpu host'. - After the bootloader, the guest will crash almost immediately with the message 'kernel trap 12 with interrupts disabled'. This occurs before any disks are loaded, so it's not possible to get a memory dump from the guest OS for backtrace. - Boot again with '-cpu host,-xsave'. The guest should boot successfully. This is probably cause by the lightweight profiling XSAVE extension. KVM does not support it. Using -cpu host may show problems when the CPU has features that are not supported by KVM. It's possible though that this has been recently fixed. Please try branch next of git://git.kernel.org/pub/scm/virt/kvm/kvm.git and report back. Paolo
Re: [Qemu-devel] xsave instruction not passed through correctly on AMD hardware
Thanks, Paolo. We will test and let you know. I'm not familiar with LWP (some reading to do there) - are there any plans to support this in future, or is this just something that we're not interested in emulating? Best regards, Owen On Mon, Nov 11, 2013 at 3:25 PM, Paolo Bonzini pbonz...@redhat.com wrote: Il 11/11/2013 15:30, Owen Tuz ha scritto: Hi all, We've been seeing a problem lately running FreeBSD 9.1 and 9.2 (latest stable) which causes the guest to crash during boot when QEMU is run on an AMD processor with the 'xsave' flag set. To reproduce this behaviour: - Boot a FreeBSD 9.1 or 9.2 guest or even installation CD on an AMD processor with xsave enabled. Use '-cpu host'. - After the bootloader, the guest will crash almost immediately with the message 'kernel trap 12 with interrupts disabled'. This occurs before any disks are loaded, so it's not possible to get a memory dump from the guest OS for backtrace. - Boot again with '-cpu host,-xsave'. The guest should boot successfully. This is probably cause by the lightweight profiling XSAVE extension. KVM does not support it. Using -cpu host may show problems when the CPU has features that are not supported by KVM. It's possible though that this has been recently fixed. Please try branch next of git://git.kernel.org/pub/scm/virt/kvm/kvm.git and report back. Paolo
[Qemu-devel] [Bug 1243287] Re: [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail
dmidecode itself should probably be checking at runtime what cpu architecture it is running on so it can refuse to read /dev/mem on systems which it doesn't know it understands. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1243287 Title: [KVM/QEMU][ARM][SAUCY] fails to boot cloud-image due to host kvm fail Status in QEMU: Confirmed Bug description: On booting the cloud image using qemu/kvm fails with the following error: Cloud-init v. 0.7.3 running 'init' at Thu, 03 Oct 2013 16:45:21 +. Up 360.78 seconds. ci-info: +Net device info+ ci-info: ++--+---+---+---+ ci-info: | Device | Up | Address | Mask | Hw-Address | ci-info: ++--+---+---+---+ ci-info: | lo | True | 127.0.0.1 | 255.0.0.0 | . | ci-info: | eth0 | True | 10.0.2.15 | 255.255.255.0 | 52:54:00:12:34:56 | ci-info: ++--+---+---+---+ ci-info: ++Route info++ ci-info: +---+-+--+---+---+---+ ci-info: | Route | Destination | Gateway | Genmask | Interface | Flags | ci-info: +---+-+--+---+---+---+ ci-info: | 0 | 0.0.0.0 | 10.0.2.2 | 0.0.0.0 | eth0 | UG | ci-info: | 1 | 10.0.2.0 | 0.0.0.0 | 255.255.255.0 | eth0 | U | ci-info: +---+-+--+---+---+---+ error: kvm run failed Function not implemented /usr/lib/python2.7/dist- packages/cloudinit/sources/DataSourceAltCloud.py assumes that dmidecode command is availabe (ie it assumes that system is x86) on arm systems there is no dmidecode command so host kvm fails with the message error: kvm run failed Function not implemented The patch makes get_cloud_type() function return with UNKNOWN for ARM systems. I was able to boot the cloud-image on ARM after applying this change. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1243287/+subscriptions
[Qemu-devel] [PULL 1/6] Fix pc migration from qemu = 1.5
From: Cole Robinson crobi...@redhat.com The following commit introduced a migration incompatibility: commit 568f0690fd9aa4d39d84b04c1a5dbb53a915c3fe Author: David Gibson da...@gibson.dropbear.id.au Date: Thu Jun 6 18:48:49 2013 +1000 pci: Replace pci_find_domain() with more general pci_root_bus_path() The issue is that i440fx savevm idstr went from :00:00.0/I440FX to :00.0/I440FX. Unfortunately we are stuck with the breakage for 1.6 machine types. Add a compat property to maintain the busted idstr for the 1.6 machine types, but revert to the old style format for 1.7+, and = 1.5. Tested with migration from qemu 1.5, qemu 1.6, and qemu.git. Cc: qemu-sta...@nongnu.org Signed-off-by: Cole Robinson crobi...@redhat.com Reviewed-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- include/hw/i386/pc.h | 16 include/hw/pci-host/q35.h | 1 + hw/pci-host/piix.c| 9 - hw/pci-host/q35.c | 10 -- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 03cc0ba..57e8d16 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -260,6 +260,14 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); .driver = qemu32- TYPE_X86_CPU,\ .property = model,\ .value= stringify(3),\ +},{\ +.driver = i440FX-pcihost,\ +.property = short_root_bus,\ +.value= stringify(1),\ +},{\ +.driver = q35-pcihost,\ +.property = short_root_bus,\ +.value= stringify(1),\ } #define PC_COMPAT_1_5 \ @@ -296,6 +304,14 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); .driver = TYPE_X86_CPU,\ .property = pmu,\ .value = on,\ +},{\ +.driver = i440FX-pcihost,\ +.property = short_root_bus,\ +.value= stringify(0),\ +},{\ +.driver = q35-pcihost,\ +.property = short_root_bus,\ +.value= stringify(0),\ } #define PC_COMPAT_1_4 \ diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index aee91aa..309065f 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -61,6 +61,7 @@ typedef struct MCHPCIState { ram_addr_t above_4g_mem_size; uint64_t pci_hole64_size; PcGuestInfo *guest_info; +uint32_t short_root_bus; } MCHPCIState; typedef struct Q35PCIHost { diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index bad3953..edc974e 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -48,6 +48,7 @@ typedef struct I440FXState { PCIHostState parent_obj; PcPciInfo pci_info; uint64_t pci_hole64_size; +uint32_t short_root_bus; } I440FXState; #define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ @@ -720,13 +721,19 @@ static const TypeInfo i440fx_info = { static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { +I440FXState *s = I440FX_PCI_HOST_BRIDGE(host_bridge); + /* For backwards compat with old device paths */ -return ; +if (s-short_root_bus) { +return ; +} +return :00; } static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), +DEFINE_PROP_UINT32(short_root_bus, I440FXState, short_root_bus, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index b8feed1..c043998 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -61,8 +61,13 @@ static void q35_host_realize(DeviceState *dev, Error **errp) static const char *q35_host_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { -/* For backwards compat with old device paths */ -return ; +Q35PCIHost *s = Q35_HOST_DEVICE(host_bridge); + + /* For backwards compat with old device paths */ +if (s-mch.short_root_bus) { +return ; +} +return :00; } static void q35_host_get_pci_hole_start(Object *obj, Visitor *v, @@ -124,6 +129,7 @@ static Property mch_props[] = { MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT), DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35PCIHost, mch.pci_hole64_size, DEFAULT_PCI_HOLE64_SIZE), +DEFINE_PROP_UINT32(short_root_bus, Q35PCIHost, mch.short_root_bus, 0), DEFINE_PROP_END_OF_LIST(), }; -- MST
[Qemu-devel] [PULL 5/6] Revert hw/pci: partially handle pci master abort
From: Marcel Apfelbaum marce...@redhat.com This reverts commit a53ae8e934cd54686875b5bcfc2f434244ee55d6. The patch being reverted introduced a low-priority memory region covering all 64 bit pci address space. This exposed the following bugs elsewhere in the code: 1. Some memory regions have INT64_MAX size, where the intent was all 64 bit address space. This results in a sub-page region, should be UINT64_MAX. 2. page table rendering in exec.c ignores physical address bits above TARGET_PHYS_ADDR_SPACE_BITS. Access outside this range (e.g. from device DMA, or gdb stub) ends up with a wrong region. Registering a region outside this range leads to page table corruption. 3. Some regions overlap PCI hole and have same priority. This only works as long as no device uses the overlapping address. It doesn't look like we can resolve all issues in time for 1.7. Let's fix the bugs first and apply afterwards for 1.8. Signed-off-by: Marcel Apfelbaum marce...@redhat.com Reviewed-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- include/hw/pci/pci_bus.h | 1 - hw/pci/pci.c | 26 -- 2 files changed, 27 deletions(-) diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 2ad5edb..9df1788 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -23,7 +23,6 @@ struct PCIBus { PCIDevice *parent_dev; MemoryRegion *address_space_mem; MemoryRegion *address_space_io; -MemoryRegion master_abort_mem; QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */ QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */ diff --git a/hw/pci/pci.c b/hw/pci/pci.c index a98c8a0..ed32059 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -283,24 +283,6 @@ const char *pci_root_bus_path(PCIDevice *dev) return rootbus-qbus.name; } -static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size) -{ - return -1ULL; -} - -static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) -{ -} - -static const MemoryRegionOps master_abort_mem_ops = { -.read = master_abort_mem_read, -.write = master_abort_mem_write, -.endianness = DEVICE_LITTLE_ENDIAN, -}; - -#define MASTER_ABORT_MEM_PRIORITY INT_MIN - static void pci_bus_init(PCIBus *bus, DeviceState *parent, const char *name, MemoryRegion *address_space_mem, @@ -312,14 +294,6 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent, bus-address_space_mem = address_space_mem; bus-address_space_io = address_space_io; - -memory_region_init_io(bus-master_abort_mem, OBJECT(bus), - master_abort_mem_ops, bus, pci-master-abort, - memory_region_size(bus-address_space_mem)); -memory_region_add_subregion_overlap(bus-address_space_mem, -0, bus-master_abort_mem, -MASTER_ABORT_MEM_PRIORITY); - /* host bridge */ QLIST_INIT(bus-child); -- MST
[Qemu-devel] [PULL 0/6] pci, pc, virtio bug fixes for 1.7
The following changes since commit c2d30667760e3d7b81290d801e567d4f758825ca: rtc: remove dead SQW IRQ code (2013-11-05 20:04:03 -0800) are available in the git repository at: git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_anthony for you to fetch changes up to ef9e455d645bed6d2360cd658dc00ca11a849877: Revert exec: limit system memory size (2013-11-10 15:11:01 +0200) pci, pc, virtio bug fixes This reverts PCI master abort support - we'll want it eventually but it exposes too many core bugs to be safe for 1.7. This also reverts a recent exec.c change that was an attempt to work-around some of these core bugs. Also included are small fixes in pc and virtio, and a core loader fix for PPC bamboo. Signed-off-by: Michael S. Tsirkin m...@redhat.com Cole Robinson (1): Fix pc migration from qemu = 1.5 Jason Wang (1): virtio-net: only delete bh that existed Marcel Apfelbaum (1): Revert hw/pci: partially handle pci master abort Michael S. Tsirkin (3): acpi-build: disable with -no-acpi loader: drop return value for rom_add_blob_fixed Revert exec: limit system memory size include/hw/i386/pc.h | 16 include/hw/loader.h | 2 +- include/hw/pci-host/q35.h | 1 + include/hw/pci/pci_bus.h | 1 - exec.c| 7 +-- hw/i386/acpi-build.c | 5 + hw/net/virtio-net.c | 2 +- hw/pci-host/piix.c| 9 - hw/pci-host/q35.c | 10 -- hw/pci/pci.c | 26 -- hw/ppc/ppc440_bamboo.c| 3 ++- 11 files changed, 43 insertions(+), 39 deletions(-) -- MST
[Qemu-devel] [PULL 6/6] Revert exec: limit system memory size
This reverts commit 818f86b88394b7b2b59d313e51043fe15a8004db. This was a work-around for bugs elsewhere in the system, exposed by commit a53ae8e934cd54686875b5bcfc2f434244ee55d6: hw/pci: partially handle pci master abort since that's reverted now, the work-around is not required for 1.7 anymore. The proper fix is supporting full 64 bit addresses in the radix tree. Signed-off-by: Michael S. Tsirkin m...@redhat.com Tested-by: Marcel Apfelbaum marce...@redhat.com --- exec.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/exec.c b/exec.c index 79610ce..b453713 100644 --- a/exec.c +++ b/exec.c @@ -1741,12 +1741,7 @@ void address_space_destroy_dispatch(AddressSpace *as) static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); - -assert(TARGET_PHYS_ADDR_SPACE_BITS = 64); - -memory_region_init(system_memory, NULL, system, - TARGET_PHYS_ADDR_SPACE_BITS == 64 ? - UINT64_MAX : (0x1ULL TARGET_PHYS_ADDR_SPACE_BITS)); +memory_region_init(system_memory, NULL, system, INT64_MAX); address_space_init(address_space_memory, system_memory, memory); system_io = g_malloc(sizeof(*system_io)); -- MST
Re: [Qemu-devel] [PATCH v3 0/2] block/drive-mirror: Check for NULL backing_hd
Am 06.11.2013 um 19:50 hat Max Reitz geschrieben: It should be possible to execute the QMP drive-mirror command in none sync mode and absolute-paths mode even for block devices lacking a backing file. absolute-paths does in fact not require a backing file to be present, as can be seen from the top sync mode code path. top basically states that the device should indeed have a backing file - however, the current code catches the case if it doesn't and then simply treats it as full sync mode, creating a target image without a backing file (in absolute-paths mode). Thus, absolute-paths does not imply the target file must indeed have a backing file. Therefore, the target file may be left unbacked in case of none sync mode as well, if the specified device is not backed either. Currently, qemu will crash trying to dereference the backing file pointer since it assumes that it will always be non-NULL in that case (none with absolute-paths). The first patch in this series adds a check whether the specified block device is backed or not (creating an unbacked target image, if required); the second patch adds a test case for mirroring unbacked block devices. v2: - patch 1: Reuse an already existing codepath to create an unbacked target image instead of introducing a new one (based on Fam's comment). - patch 2: Incorporated test case into 041 instead of creating a new file (according to Xia's and Paolo's comments). Max Reitz (2): block/drive-mirror: Check for NULL backing_hd qemu-iotests: Extend 041 for unbacked mirroring blockdev.c | 4 +++- tests/qemu-iotests/041 | 25 + tests/qemu-iotests/041.out | 4 ++-- 3 files changed, 30 insertions(+), 3 deletions(-) Thanks, applied to the block branch. Kevin
[Qemu-devel] [PULL 3/6] acpi-build: disable with -no-acpi
QEMU will currently crash if started with -no-acpi flag since acpi build code probes the PM device which isn't present in this configuration. To fix, don't expose ACPI tables to guest when acpi has been disabled from command line. Fixes LP# 1248854 https://bugs.launchpad.net/qemu/+bug/1248854 Reported-by: chao zhou chao.z...@intel.com Signed-off-by: Michael S. Tsirkin m...@redhat.com Reviewed-by: Gerd Hoffmann kra...@redhat.com --- hw/i386/acpi-build.c | 5 + 1 file changed, 5 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 6cfa044..486e705 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1182,6 +1182,11 @@ void acpi_setup(PcGuestInfo *guest_info) return; } +if (!acpi_enabled) { +ACPI_BUILD_DPRINTF(3, ACPI disabled. Bailing out.\n); +return; +} + build_state = g_malloc0(sizeof *build_state); build_state-guest_info = guest_info; -- MST
Re: [Qemu-devel] [PATCH] qapi-schema: Update description for NewImageMode
Am 07.11.2013 um 19:47 hat Max Reitz geschrieben: If the NewImageMode is absolute-paths but no backing file is available (e.g., when mirroring a device with an unbacked image), the target image will not be backed either. This patch updates the documentation in qapi-schema.json accordingly. Signed-off-by: Max Reitz mre...@redhat.com --- Follow-up to: - block/drive-mirror: Check for NULL backing_hd Thanks, applied to the block branch. Kevin
[Qemu-devel] [PULL 2/6] virtio-net: only delete bh that existed
From: Jason Wang jasow...@redhat.com We delete without check whether it existed during exit. This will lead NULL pointer deference since it was created conditionally depends on guest driver status and features. So add a check of existence before trying to delete it. Cc: qemu-sta...@nongnu.org Signed-off-by: Jason Wang jasow...@redhat.com Reviewed-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/net/virtio-net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 22dbd05..ae51d96 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1601,7 +1601,7 @@ static int virtio_net_device_exit(DeviceState *qdev) if (q-tx_timer) { timer_del(q-tx_timer); timer_free(q-tx_timer); -} else { +} else if (q-tx_bh) { qemu_bh_delete(q-tx_bh); } } -- MST
[Qemu-devel] [PULL 4/6] loader: drop return value for rom_add_blob_fixed
rom_add_blob never fails, and neither does rom_add_blob_fixed, so there's no need to return value from it. In fact, rom_add_blob_fixed was erroneously returning -1 unconditionally which made the only system that checked the return value -M bamboo fail to start. Drop the return value and drop checks from ppc440_bamboo to fix this failure. Reported-by: Alexander Graf ag...@suse.de Signed-off-by: Michael S. Tsirkin m...@redhat.com --- include/hw/loader.h| 2 +- hw/ppc/ppc440_bamboo.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/hw/loader.h b/include/hw/loader.h index 58eca98..7a23d6b 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -55,7 +55,7 @@ void do_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_file_fixed(_f, _a, _i) \ rom_add_file(_f, NULL, _a, _i) #define rom_add_blob_fixed(_f, _b, _l, _a) \ -(rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL) ? 0 : -1) +rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL) #define PC_ROM_MIN_VGA 0xc #define PC_ROM_MIN_OPTION 0xc8000 diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index 655e499..67597df 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -110,8 +110,9 @@ static int bamboo_load_device_tree(hwaddr addr, qemu_devtree_setprop_cell(fdt, /cpus/cpu@0, timebase-frequency, tb_freq); -ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr); +rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr); g_free(fdt); +return 0; out: -- MST
Re: [Qemu-devel] [PATCH] MAINTAINERS: add block tree repo URLs
Am 06.11.2013 um 16:06 hat Stefan Hajnoczi geschrieben: Signed-off-by: Stefan Hajnoczi stefa...@redhat.com --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 77edacf..6c63548 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -639,6 +639,8 @@ S: Supported F: block* F: block/ F: hw/block/ +T: git git://repo.or.cz/qemu/kevin.git block +T: git git://github.com/stefanha/qemu.git block Character Devices M: Anthony Liguori aligu...@amazon.com Thanks, applied to the block branch. By the way, are you planning to send a v2 of the other MAINTAINERS patch that assigns maintainers to individual block drivers? Kevin
[Qemu-devel] [PATCH RFC 4/5] exec: extend skip field to 3 bits
13 bits left for pointer, which seems enough. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exec.c b/exec.c index 7e512db..39f76ee 100644 --- a/exec.c +++ b/exec.c @@ -84,9 +84,9 @@ typedef struct PhysPageEntry PhysPageEntry; struct PhysPageEntry { /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ -uint16_t skip : 1; +uint16_t skip : 3; /* index into phys_sections (!skip) or phys_map_nodes (skip) */ -uint16_t ptr : 15; +uint16_t ptr : 13; }; /* Size of the L2 (and L3, etc) page tables. */ @@ -134,7 +134,7 @@ typedef struct PhysPageMap { static PhysPageMap *prev_map; static PhysPageMap next_map; -#define PHYS_MAP_NODE_NIL (((uint16_t)~0) 1) +#define PHYS_MAP_NODE_NIL (((uint16_t)~0) 3) static void io_mem_init(void); static void memory_map_init(void); -- MST
[Qemu-devel] [PATCH RFC 2/5] exec: make address spaces 64-bit wide
From: Paolo Bonzini pbonz...@redhat.com As an alternative to commit 818f86b (exec: limit system memory size, 2013-11-04) let's just make all address spaces 64-bit wide. This eliminates problems with phys_page_find ignoring bits above TARGET_PHYS_ADDR_SPACE_BITS and address_space_translate_internal consequently messing up the computations. In Luiz's reported crash, at startup gdb attempts to read from address 0xffe6 to 0x inclusive. The region it gets is the newly introduced master abort region, which is as big as the PCI address space (see pci_bus_init). Due to a typo that's only 2^63-1, not 2^64. But we get it anyway because phys_page_find ignores the upper bits of the physical address. In address_space_translate_internal then diff = int128_sub(section-mr-size, int128_make64(addr)); *plen = int128_get64(int128_min(diff, int128_make64(*plen))); diff becomes negative, and int128_get64 booms. The size of the PCI address space region should be fixed anyway. Reported-by: Luiz Capitulino lcapitul...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- exec.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/exec.c b/exec.c index 9e2fc4b..d5ce3da 100644 --- a/exec.c +++ b/exec.c @@ -89,7 +89,7 @@ struct PhysPageEntry { }; /* Size of the L2 (and L3, etc) page tables. */ -#define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS +#define ADDR_SPACE_BITS 64 #define P_L2_BITS 10 #define P_L2_SIZE (1 P_L2_BITS) @@ -1750,11 +1750,7 @@ static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); -assert(ADDR_SPACE_BITS = 64); - -memory_region_init(system_memory, NULL, system, - ADDR_SPACE_BITS == 64 ? - UINT64_MAX : (0x1ULL ADDR_SPACE_BITS)); +memory_region_init(system_memory, NULL, system, UINT64_MAX); address_space_init(address_space_memory, system_memory, memory); system_io = g_malloc(sizeof(*system_io)); -- MST
[Qemu-devel] [PATCH RFC 3/5] exec: relace leaf with skip
In preparation for dynamic page support, rename is_leaf field to skip, telling us how many bits to skip to next level. Set to 0 for leaf. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- exec.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/exec.c b/exec.c index d5ce3da..7e512db 100644 --- a/exec.c +++ b/exec.c @@ -83,8 +83,9 @@ int use_icount; typedef struct PhysPageEntry PhysPageEntry; struct PhysPageEntry { -uint16_t is_leaf : 1; - /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */ +/* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ +uint16_t skip : 1; + /* index into phys_sections (!skip) or phys_map_nodes (skip) */ uint16_t ptr : 15; }; @@ -164,7 +165,7 @@ static uint16_t phys_map_node_alloc(void) assert(ret != PHYS_MAP_NODE_NIL); assert(ret != next_map.nodes_nb_alloc); for (i = 0; i P_L2_SIZE; ++i) { -next_map.nodes[ret][i].is_leaf = 0; +next_map.nodes[ret][i].skip = 1; next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } return ret; @@ -178,12 +179,12 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, int i; hwaddr step = (hwaddr)1 (level * P_L2_BITS); -if (!lp-is_leaf lp-ptr == PHYS_MAP_NODE_NIL) { +if (lp-skip lp-ptr == PHYS_MAP_NODE_NIL) { lp-ptr = phys_map_node_alloc(); p = next_map.nodes[lp-ptr]; if (level == 0) { for (i = 0; i P_L2_SIZE; i++) { -p[i].is_leaf = 1; +p[i].skip = 0; p[i].ptr = PHYS_SECTION_UNASSIGNED; } } @@ -194,7 +195,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, while (*nb lp p[P_L2_SIZE]) { if ((*index (step - 1)) == 0 *nb = step) { -lp-is_leaf = true; +lp-skip = 0; lp-ptr = leaf; *index += step; *nb -= step; @@ -221,7 +222,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, PhysPageEntry *p; int i; -for (i = P_L2_LEVELS - 1; i = 0 !lp.is_leaf; i--) { +for (i = P_L2_LEVELS - 1; i = 0 lp.skip; i -= lp.skip) { if (lp.ptr == PHYS_MAP_NODE_NIL) { return sections[PHYS_SECTION_UNASSIGNED]; } @@ -1644,7 +1645,7 @@ static void mem_begin(MemoryListener *listener) AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); -d-phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; +d-phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; d-as = as; as-next_dispatch = d; } -- MST
[Qemu-devel] [PATCH RFC 1/5] split definitions for exec.c and translate-all.c radix trees
From: Paolo Bonzini pbonz...@redhat.com The exec.c and translate-all.c radix trees are quite different, and the exec.c one in particular is not limited to the CPU---it can be used also by devices that do DMA, and in that case the address space is not limited to TARGET_PHYS_ADDR_SPACE_BITS bits. We want to make exec.c's radix trees 64-bit wide. As a first step, stop sharing the constants between exec.c and translate-all.c. exec.c gets P_L2_* constants, translate-all.c gets V_L2_*, for consistency with the existing V_L1_* symbols. Though actually in the softmmu case translate-all.c is also indexed by physical addresses... This patch has no semantic change. Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- translate-all.h | 7 --- exec.c | 29 + translate-all.c | 32 ++-- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/translate-all.h b/translate-all.h index 5c38819..f7e5932 100644 --- a/translate-all.h +++ b/translate-all.h @@ -19,13 +19,6 @@ #ifndef TRANSLATE_ALL_H #define TRANSLATE_ALL_H -/* Size of the L2 (and L3, etc) page tables. */ -#define L2_BITS 10 -#define L2_SIZE (1 L2_BITS) - -#define P_L2_LEVELS \ -(((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1) - /* translate-all.c */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); void cpu_unlink_tb(CPUState *cpu); diff --git a/exec.c b/exec.c index b453713..9e2fc4b 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,15 @@ struct PhysPageEntry { uint16_t ptr : 15; }; -typedef PhysPageEntry Node[L2_SIZE]; +/* Size of the L2 (and L3, etc) page tables. */ +#define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS + +#define P_L2_BITS 10 +#define P_L2_SIZE (1 P_L2_BITS) + +#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) + +typedef PhysPageEntry Node[P_L2_SIZE]; struct AddressSpaceDispatch { /* This is a multi-level map on the physical address space. @@ -155,7 +163,7 @@ static uint16_t phys_map_node_alloc(void) ret = next_map.nodes_nb++; assert(ret != PHYS_MAP_NODE_NIL); assert(ret != next_map.nodes_nb_alloc); -for (i = 0; i L2_SIZE; ++i) { +for (i = 0; i P_L2_SIZE; ++i) { next_map.nodes[ret][i].is_leaf = 0; next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } @@ -168,13 +176,13 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, { PhysPageEntry *p; int i; -hwaddr step = (hwaddr)1 (level * L2_BITS); +hwaddr step = (hwaddr)1 (level * P_L2_BITS); if (!lp-is_leaf lp-ptr == PHYS_MAP_NODE_NIL) { lp-ptr = phys_map_node_alloc(); p = next_map.nodes[lp-ptr]; if (level == 0) { -for (i = 0; i L2_SIZE; i++) { +for (i = 0; i P_L2_SIZE; i++) { p[i].is_leaf = 1; p[i].ptr = PHYS_SECTION_UNASSIGNED; } @@ -182,9 +190,9 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, } else { p = next_map.nodes[lp-ptr]; } -lp = p[(*index (level * L2_BITS)) (L2_SIZE - 1)]; +lp = p[(*index (level * P_L2_BITS)) (P_L2_SIZE - 1)]; -while (*nb lp p[L2_SIZE]) { +while (*nb lp p[P_L2_SIZE]) { if ((*index (step - 1)) == 0 *nb = step) { lp-is_leaf = true; lp-ptr = leaf; @@ -218,7 +226,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, return sections[PHYS_SECTION_UNASSIGNED]; } p = nodes[lp.ptr]; -lp = p[(index (i * L2_BITS)) (L2_SIZE - 1)]; +lp = p[(index (i * P_L2_BITS)) (P_L2_SIZE - 1)]; } return sections[lp.ptr]; } @@ -1741,7 +1749,12 @@ void address_space_destroy_dispatch(AddressSpace *as) static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); -memory_region_init(system_memory, NULL, system, INT64_MAX); + +assert(ADDR_SPACE_BITS = 64); + +memory_region_init(system_memory, NULL, system, + ADDR_SPACE_BITS == 64 ? + UINT64_MAX : (0x1ULL ADDR_SPACE_BITS)); address_space_init(address_space_memory, system_memory, memory); system_io = g_malloc(sizeof(*system_io)); diff --git a/translate-all.c b/translate-all.c index aeda54d..1c63d78 100644 --- a/translate-all.c +++ b/translate-all.c @@ -96,12 +96,16 @@ typedef struct PageDesc { # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS #endif +/* Size of the L2 (and L3, etc) page tables. */ +#define V_L2_BITS 10 +#define V_L2_SIZE (1 V_L2_BITS) + /* The bits remaining after N lower levels of page tables. */ #define V_L1_BITS_REM \ -((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS) +((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS) #if V_L1_BITS_REM 4 -#define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
[Qemu-devel] [PATCH RFC 5/5] exec: memory radix tree page level compression
At the moment, memory radix tree is already variable width, but it can only skip the low bits of address. This is efficient if we have huge memory regions but inefficient if we are only using a tiny portion of the address space. After we have built up the map, it's a simple matter to detect configurations where a single L2 entry is valid. We can them speed up the lookup by skipping one or more levels. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- exec.c | 71 ++ 1 file changed, 71 insertions(+) diff --git a/exec.c b/exec.c index 39f76ee..3ec6c2c 100644 --- a/exec.c +++ b/exec.c @@ -216,6 +216,75 @@ static void phys_page_set(AddressSpaceDispatch *d, phys_page_set_level(d-phys_map, index, nb, leaf, P_L2_LEVELS - 1); } +/* Compact a non leaf page entry. Simply detect that the entry has a single child, + * and update our entry so we can skip it and go directly to the destination. + */ +static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted) +{ +unsigned valid_ptr = P_L2_SIZE; +int valid = 0; +PhysPageEntry *p; +int i; + +if (lp-ptr == PHYS_MAP_NODE_NIL || test_and_set_bit(lp-ptr, compacted)) { +return; +} + +set_bit(lp-ptr, compacted); + +p = nodes[lp-ptr]; +for (i = 0; i P_L2_SIZE; i++) { +if (p[i].ptr == PHYS_MAP_NODE_NIL) { +continue; +} + +valid_ptr = i; +valid++; +if (p[i].skip) { +phys_page_compact(p[i], nodes, compacted); +} +} + +/* We can only compress if there's only one child. */ +if (valid != 1) { +return; +} + +assert(valid_ptr P_L2_SIZE); + +/* Don't compress if it won't fit in the # of bits we have. */ +if (lp-skip + p[valid_ptr].skip = (1 3)) { +return; +} + +lp-ptr = p[valid_ptr].ptr; +if (!p[valid_ptr].skip) { +/* If our only child is a leaf, make this a leaf. */ +/* By design, we should have made this node a leaf to begin with so we + * should never reach here. + * But since it's so simple to handle this, let's do it just in case we + * change this rule. + */ +lp-skip = 0; +} else { +lp-skip += p[valid_ptr].skip; +} +} + +static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb) +{ +DECLARE_BITMAP(compacted, nodes_nb); +int i; + +return; + +for (i = 0; i next_map.nodes_nb; ++i) { +if (d-phys_map.skip) { +phys_page_compact(d-phys_map, d-nodes, compacted); +} +} +} + static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, Node *nodes, MemoryRegionSection *sections) { @@ -1659,6 +1728,8 @@ static void mem_commit(MemoryListener *listener) next-nodes = next_map.nodes; next-sections = next_map.sections; +phys_page_compact_all(next, next_map.nodes_nb); + as-dispatch = next; g_free(cur); } -- MST
Re: [Qemu-devel] [PATCH v3 0/2] COW: Speed up writes
Am 06.11.2013 um 16:59 hat Charlie Shepherd geschrieben: v3: - Refix cow_update_bitmap and squash patches 1 3 together to ensuring that we only flush if necessary, patch 1 on its own would change this causing a regression. v2: - Fix bit position calculations in cow_update_bitmap - Add necessary check in cow_set_bits Following on from Paolo's commits 26ae980 and 276cbc7, this patchset implements some changes he recommended earlier which I didn't previously have time to do while on GSoC. Charlie Shepherd (2): COW: Speed up writes COW: Extend checking allocated bits to beyond one sector block/cow.c | 123 1 file changed, 75 insertions(+), 48 deletions(-) Thanks, applied to the block-next branch for 1.8. Your lines in the commit message were a bit too long, please try to have a line break after ~72 characters in future patches (I fixed it up manually for this one). Kevin
Re: [Qemu-devel] [PATCH 0/2] exec: alternative fix for master abort woes
On Thu, Nov 07, 2013 at 06:29:40PM +0100, Paolo Bonzini wrote: Il 07/11/2013 17:47, Michael S. Tsirkin ha scritto: That's on kvm with 52 bit address. But where I would be concerned is systems with e.g. 36 bit address space where we are doubling the cost of the lookup. E.g. try i386 and not x86_64. Tried now... P_L2_LEVELS pre-patch post-patch i386 3 6 x86_64 4 6 I timed the inl_from_qemu test of vmexit.flat with both KVM and TCG. With TCG there's indeed a visible penalty of 20 cycles for i386 and 10 for x86_64 (you can extrapolate to 30 cycles for TARGET_PHYS_ADDR_SPACE_BITS=32 targets). So how did you measure this exactly? These can be more or less entirely ascribed to phys_page_find: TCG | KVM pre-patch post-patch | pre-patch post-patch phys_page_find(i386) 13% 25%| 0.6% 1% inl_from_qemu cycles(i386)153 173| ~12000 ~12000 phys_page_find(x86_64)18% 25%| 0.8% 1% inl_from_qemu cycles(x86_64) 163 173| ~12000 ~12000 Thus this patch costs 0.4% in the worst case for KVM, 12% in the worst case for TCG. The cycle breakdown is: 60 phys_page_find 28 access_with_adjusted_size 24 address_space_translate_internal 20 address_space_rw 13 io_mem_read 11 address_space_translate 9 memory_region_read_accessor 6 memory_region_access_valid 4 helper_inl 4 memory_access_size 3 cpu_inl (This run reported 177 cycles per access; the total is 182 due to rounding). It is probably possible to shave at least 10 cycles from the functions below, or to make the depth of the tree dynamic so that you would save even more compared to 1.6.0. Also, compiling with -fstack-protector instead of -fstack-protector-all, as suggested a while ago by rth, is already giving a savings of 20 cycles. And of course, if this were a realistic test, KVM's 60x penalty would be a severe problem---but it isn't, because this is not a realistic setting. Paolo
[Qemu-devel] audit needed for signal handlers
Quick - identify the bug in this code (from ui/curses.c): static void curses_winch_handler(int signum) { struct winsize { unsigned short ws_row; unsigned short ws_col; unsigned short ws_xpixel; /* unused */ unsigned short ws_ypixel; /* unused */ } ws; /* terminal size changed */ if (ioctl(1, TIOCGWINSZ, ws) == -1) return; resize_term(ws.ws_row, ws.ws_col); curses_calc_pad(); invalidate = 1; /* some systems require this */ signal(SIGWINCH, curses_winch_handler); } Here's a hint: ioctl() can clobber errno. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. More reading on the topic: https://plus.google.com/+LennartPoetteringTheOneAndOnly/posts/gHSscCJkakd I have not done a full audit of qemu's signal handlers, so much as a quick look to see if I could find violations; it was surprisingly easy to find a bad example. A signal handler that resets the signal to SIG_DFL then calls raise() is exempt from caring about errno, but any signal handler that can fall through to the end and return execution to the caller MUST ensure that errno is left unchanged, for errno to be useful in the remaining body of code. Which is why the best signal handlers tend to be the one that only flag a volatile variable that is later checked at safe points of execution, rather than trying to make complex calls from within the handler context. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH RFC 0/5] making address spaces 64 bit wide
Il 11/11/2013 17:40, Michael S. Tsirkin ha scritto: At the moment, exec ignores high bits in each address, for efficiency. This is incorrect: devices can do full 64 bit DMA, it's only the CPU that is limited by target address space. Using full 64 bit addresses was clocked at 12% performance hit on a microbenchmark. To solve, teach pagetables to skip bits at any level and not just the lowest level. This should solve the performance problem (only one line of code changed on the data path). I'm still trying to figure out how to measure speed properly with TCG, sending this out for early feedback and flames. I used this: x86_64-softmmu/qemu-system-x86_64 -kernel ../../kvm-unit-tests/x86/vmexit.flat -serial stdio -device isa-debug-exit,iobase=0xf4 with only one test enabled (I tried both inl_from_qemu and inl_from_pmtimer) and with roughly the same inlining of the inb %dx, %al instruction that you suggested earlier on the mailing list. Paolo
[Qemu-devel] [PATCH v2 02/19] bsd-user: add HOST_ABI_DIR for the various *BSD dependent code.
This change adds HOST_ABI_DIR (similar to TARGET_ABI_DIR) so the various BSD OS dependent code can be seperated into its own directories rather than using #ifdef's. Signed-off-by: Stacey Son s...@freebsd.org --- Makefile.target |3 ++- configure | 11 +++ 2 files changed, 13 insertions(+), 1 deletions(-) diff --git a/Makefile.target b/Makefile.target index af6ac7e..82ae8cb 100644 --- a/Makefile.target +++ b/Makefile.target @@ -99,7 +99,8 @@ endif #CONFIG_LINUX_USER ifdef CONFIG_BSD_USER -QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) +QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \ +-I$(SRC_PATH)/bsd-user/$(HOST_ABI_DIR) obj-y += bsd-user/ obj-y += gdbstub.o user-exec.o diff --git a/configure b/configure index 91372f9..14571c6 100755 --- a/configure +++ b/configure @@ -449,6 +449,9 @@ fi # OS specific +# host *BSD for user mode +HOST_ABI_DIR= + case $targetos in CYGWIN*) mingw32=yes @@ -473,12 +476,14 @@ FreeBSD) audio_possible_drivers=oss sdl esd pa # needed for kinfo_getvmmap(3) in libutil.h LIBS=-lutil $LIBS + HOST_ABI_DIR=freebsd ;; DragonFly) bsd=yes make=${MAKE-gmake} audio_drv_list=oss audio_possible_drivers=oss sdl esd pa + HOST_ABI_DIR=dragonfly ;; NetBSD) bsd=yes @@ -486,12 +491,14 @@ NetBSD) audio_drv_list=oss audio_possible_drivers=oss sdl esd oss_lib=-lossaudio + HOST_ABI_DIR=netbsd ;; OpenBSD) bsd=yes make=${MAKE-gmake} audio_drv_list=sdl audio_possible_drivers=sdl esd + HOST_ABI_DIR=openbsd ;; Darwin) bsd=yes @@ -510,6 +517,7 @@ Darwin) # Disable attempts to use ObjectiveC features in os/object.h since they # won't work when we're compiling with gcc as a C compiler. QEMU_CFLAGS=-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS + HOST_ABI_DIR=darwin ;; SunOS) solaris=yes @@ -4471,6 +4479,9 @@ if [ $TARGET_ABI_DIR = ]; then TARGET_ABI_DIR=$TARGET_ARCH fi echo TARGET_ABI_DIR=$TARGET_ABI_DIR $config_target_mak +if [ $HOST_ABI_DIR != ]; then +echo HOST_ABI_DIR=$HOST_ABI_DIR $config_target_mak +fi case $target_name in i386|x86_64) if test $xen = yes -a $target_softmmu = yes ; then -- 1.7.8
Re: [Qemu-devel] xsave instruction not passed through correctly on AMD hardware
Il 11/11/2013 16:43, Owen Tuz ha scritto: Thanks, Paolo. We will test and let you know. I'm not familiar with LWP (some reading to do there) - are there any plans to support this in future, or is this just something that we're not interested in emulating? Given Linux does not support LWP, and AMD is not really trying to compete with Intel anymore, it is quite unlikely that KVM will support LWP in the future. Paolo
Re: [Qemu-devel] audit needed for signal handlers
On Mon, Nov 11, 2013 at 8:50 AM, Eric Blake ebl...@redhat.com wrote: Quick - identify the bug in this code (from ui/curses.c): static void curses_winch_handler(int signum) { struct winsize { unsigned short ws_row; unsigned short ws_col; unsigned short ws_xpixel; /* unused */ unsigned short ws_ypixel; /* unused */ } ws; /* terminal size changed */ if (ioctl(1, TIOCGWINSZ, ws) == -1) return; resize_term(ws.ws_row, ws.ws_col); curses_calc_pad(); invalidate = 1; /* some systems require this */ signal(SIGWINCH, curses_winch_handler); } Here's a hint: ioctl() can clobber errno. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. Isn't this precisely why EINTR exists? Regards, Anthony Liguori More reading on the topic: https://plus.google.com/+LennartPoetteringTheOneAndOnly/posts/gHSscCJkakd I have not done a full audit of qemu's signal handlers, so much as a quick look to see if I could find violations; it was surprisingly easy to find a bad example. A signal handler that resets the signal to SIG_DFL then calls raise() is exempt from caring about errno, but any signal handler that can fall through to the end and return execution to the caller MUST ensure that errno is left unchanged, for errno to be useful in the remaining body of code. Which is why the best signal handlers tend to be the one that only flag a volatile variable that is later checked at safe points of execution, rather than trying to make complex calls from within the handler context. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org
[Qemu-devel] [PATCH v2 10/19] bsd-user: add support for file system related system calls
This change adds support or stubs for file system (except stat) related system calls including read(2), pread(2), readv(2), write(2), pwrite(2), writev(2), pwritev(2), open(2), openat(2), close(2), closefrom(2), revoke(2), access(2), eaccess(2), faccessat(2), chdir(2), fchdir(2), rename(2), renameat(2), link(2), linkat(2), unlink(2), unlinkat(2), mkdir(2), mkdirat(2), rmdir(2), __getcwd(), dup(2), dup2(2), truncate(2), ftruncate(2), acct(2), sync(2), mount(2), nmount(2), symlink(2), symlinkat(2), readlink(2), readlinkat(2), chmod(2), fchmod(2), lchmod(2), fchmodat(2), mknod(2), mknodat(2), chown(2), fchown(2), lchown(2), fchownat(2), chflags(2), lchflags(2), fchflags(2), chroot(2), flock(2), mkfifo(2), mkfifoat(2), pathconf(2), lpathconf(2), fpathconf(2), undelete(2), poll(2), lseek(2), pipe(2), swapon(2), swapoff(2), and the undocumented openbsd_poll() and freebsd6_*() system calls. Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/bsd-file.h | +++ bsd-user/qemu.h | 36 ++ bsd-user/syscall.c | 391 ++ 3 files changed, 1454 insertions(+), 84 deletions(-) create mode 100644 bsd-user/bsd-file.h diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h new file mode 100644 index 000..fc279a8 --- /dev/null +++ b/bsd-user/bsd-file.h @@ -0,0 +1, @@ +/* + * file related system call shims and definitions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#ifndef __BSD_FILE_H_ +#define __BSD_FILE_H_ + +#include sys/types.h +#include sys/mount.h +#include sys/uio.h +#include fcntl.h +#include poll.h +#include stdio.h +#include stdlib.h +#include unistd.h + +#define target_to_host_bitmask(x, tbl) (x) + +#define LOCK_PATH(p, arg) do { \ +(p) = lock_user_string(arg); \ +if ((p) == NULL) { \ +return -TARGET_EFAULT; \ +} \ +} while (0) + +#define UNLOCK_PATH(p, arg) unlock_user((p), (arg), 0) + +struct target_pollfd { +int32_t fd; /* file descriptor */ +int16_t events; /* requested events */ +int16_t revents;/* returned events */ +}; + +static abi_long lock_iovec(int type, struct iovec *vec, abi_ulong target_addr, +int count, int copy); +static abi_long unlock_iovec(struct iovec *vec, abi_ulong target_addr, +int count, int copy); +extern int __getcwd(char *path, size_t len); + +/* read(2) */ +static inline abi_long do_bsd_read(abi_long arg1, abi_long arg2, abi_long arg3) +{ +abi_long ret; +void *p; + +p = lock_user(VERIFY_WRITE, arg2, arg3, 0); +if (p == NULL) { +return -TARGET_EFAULT; +} +ret = get_errno(read(arg1, p, arg3)); +unlock_user(p, arg2, ret); + +return ret; +} + +/* pread(2) */ +static inline abi_long do_bsd_pread(void *cpu_env, abi_long arg1, +abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6) +{ +abi_long ret; +void *p; + +p = lock_user(VERIFY_WRITE, arg2, arg3, 0); +if (p == NULL) { +return -TARGET_EFAULT; +} +if (regpairs_aligned(cpu_env) != 0) { +arg4 = arg5; +arg5 = arg6; +} +ret = get_errno(pread(arg1, p, arg3, target_offset64(arg4, arg5))); +unlock_user(p, arg2, ret); + +return ret; +} + +/* readv(2) */ +static inline abi_long do_bsd_readv(abi_long arg1, abi_long arg2, abi_long arg3) +{ +abi_long ret; +int count = arg3; +struct iovec *vec; + +vec = alloca(count * sizeof(struct iovec)); +if (vec == NULL) { +return -TARGET_ENOMEM; +} +if (lock_iovec(VERIFY_WRITE, vec, arg2, count, 0) 0) { +return -TARGET_EFAULT; +} +ret = get_errno(readv(arg1, vec, count)); +unlock_iovec(vec, arg2, count, 1); + +return ret; +} + +/* write(2) */ +static inline abi_long do_bsd_write(abi_long arg1, abi_long arg2, abi_long arg3) +{ +abi_long ret; +void *p; + +p = lock_user(VERIFY_READ, arg2, arg3, 1); +if (p == NULL) { +return -TARGET_EFAULT; +} +ret = get_errno(write(arg1, p, arg3)); +unlock_user(p, arg2, 0); + +return ret; +} + +/* pwrite(2) */ +static inline abi_long do_bsd_pwrite(void *cpu_env, abi_long arg1, +abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5,
[Qemu-devel] [PATCH v2 00/19] bsd-user: Add system call and mips/arm support.
[v2] - Rebases to 1.7.0-rc0. (Requires, however, Andreas Tobler's patch to build: see http://lists.nongnu.org/archive/html/qemu-devel/2013-11/msg0.html) - Fixes deadlock in the _umtx_op() system call handler. - Fixes race condition in mmap() system call handler. - Makes qemu-mips (o32) usable. - A small code clean up to the ARM cpu_loop(). - Fixes comment in arm-bsd-user.mak to match filename. - Fixes symbol conflicts with FreeBSD's libcrypto for static link. [v1] This patch series adds a significant number of system calls and mips/arm support for bsd-user. In its current state it can emulate most FreeBSD mips/mips64 and arm target binaries on a x86 host in a simple chroot environment. (see https://wiki.freebsd.org/QemuUserModeHowTo for the details.) Besides adding a lot of shims and other support code this change restructures the code significantly to reduce the amount of C preprocessor conditionals for the various target and host arch/OS's. In general, the target cpu depedent code has been moved into into the various arch directories and the host OS dependent code (ie. FreeBSD, NetBSD, OpenBSD) has been moved into the OS directories as much as possible. I would like to recognize Olivier Houchard for a lot of the arm dependent code and Juergen Lock, the maintainer of the FreeBSD Qemu port, for their contributions. Best Regards, Stacey D. Son --- Stacey Son (19): bsd-user: refresh freebsd system call numbers bsd-user: add HOST_ABI_DIR for the various *BSD dependent code. bsd-user: move OS/arch dependent code for strace into separate directories bsd-user: move target arch and host OS dependent code out of main.c bsd-user: move target arch and host OS dependent code out of syscall.c bsd-user: add support for freebsd time related system calls bsd-user: add support for freebsd signal related system calls bsd-user: move target arch and host OS dependent code out of elfload.c bsd-user: add support for freebsd process related system calls bsd-user: add support for file system related system calls bsd-user: add support for stat, directory, and file control related system calls bsd-user: add support for memory management related system calls bsd-user: add support for socket related system calls bsd-user: add support for thread related system calls bsd-user: add support for the ioctl system call bsd-user: add support for extended attribute and ACL related syscalls bsd-user: add support for miscellaneous system calls bsd-user: add arm, mips and mips64 options to configure target-list bsd-user: fix linking conflicts with FreeBSD libcrypto Makefile.target |5 +- bsd-user/Makefile.objs |6 +- bsd-user/arm/syscall.h | 36 + bsd-user/arm/target_arch.h | 10 + bsd-user/arm/target_arch_cpu.c | 27 + bsd-user/arm/target_arch_cpu.h | 375 ++ bsd-user/arm/target_arch_elf.h | 54 + bsd-user/arm/target_arch_signal.h | 257 + bsd-user/arm/target_arch_sigtramp.h | 33 + bsd-user/arm/target_arch_sysarch.h | 78 ++ bsd-user/arm/target_arch_thread.h | 67 ++ bsd-user/arm/target_arch_vmparam.h | 51 + bsd-user/bsd-file.h | ++ bsd-user/bsd-ioctl.c| 448 bsd-user/bsd-ioctl.h| 27 + bsd-user/bsd-mem.c | 122 ++ bsd-user/bsd-mem.h | 393 +++ bsd-user/bsd-misc.c | 209 bsd-user/bsd-misc.h | 339 ++ bsd-user/bsd-proc.c | 160 +++ bsd-user/bsd-proc.h | 434 +++ bsd-user/bsd-signal.h | 232 bsd-user/bsd-socket.c | 108 ++ bsd-user/bsd-socket.h | 266 + bsd-user/bsdload.c | 170 ++- bsd-user/elfload.c | 956 - bsd-user/errno_defs.h | 13 +- bsd-user/freebsd/host_os.h | 46 + bsd-user/freebsd/os-extattr.c | 119 ++ bsd-user/freebsd/os-extattr.h | 644 +++ bsd-user/freebsd/os-ioctl-cmds.h| 47 + bsd-user/freebsd/os-ioctl-filio.h | 45 + bsd-user/freebsd/os-ioctl-ioccom.h | 54 + bsd-user/freebsd/os-ioctl-ttycom.h | 257 + bsd-user/freebsd/os-ioctl-types.h |7 + bsd-user/freebsd/os-misc.h | 442 bsd-user/freebsd/os-proc.c | 234 bsd-user/freebsd/os-proc.h | 428 +++ bsd-user/freebsd/os-signal.h| 43 + bsd-user/freebsd/os-socket.c| 149 +++ bsd-user/freebsd/os-socket.h| 548 + bsd-user/freebsd/os-stat.c | 234 bsd-user/freebsd/os-stat.h | 437 +++ bsd-user/freebsd/os-strace.h| 29 +
[Qemu-devel] [PATCH v2 11/19] bsd-user: add support for stat, directory, and file control related system calls
This change adds support or stubs for stat, directory, and file control related system calls including stat(2), lstat(2), fstat(2), fstatat(2), nstat(), nfstat(), nlstat(), getfh(2), lgetfh(2), fhopen(2), fhstat(2), fhstatfs(2), statfs(2), fstatfs(2), getfsstat(2), getdents(2), getdirentries(2), and fcntl(2). Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/Makefile.objs |1 + bsd-user/freebsd/os-stat.c | 234 +++ bsd-user/freebsd/os-stat.h | 437 bsd-user/freebsd/qemu-os.h |8 + bsd-user/netbsd/os-stat.c |1 + bsd-user/netbsd/os-stat.h |1 + bsd-user/openbsd/os-stat.c |1 + bsd-user/openbsd/os-stat.h | 176 ++ bsd-user/syscall.c | 76 9 files changed, 935 insertions(+), 0 deletions(-) create mode 100644 bsd-user/freebsd/os-stat.c create mode 100644 bsd-user/freebsd/os-stat.h create mode 100644 bsd-user/netbsd/os-stat.c create mode 100644 bsd-user/netbsd/os-stat.h create mode 100644 bsd-user/openbsd/os-stat.c create mode 100644 bsd-user/openbsd/os-stat.h diff --git a/bsd-user/Makefile.objs b/bsd-user/Makefile.objs index 6a2fc37..ee70866 100644 --- a/bsd-user/Makefile.objs +++ b/bsd-user/Makefile.objs @@ -1,5 +1,6 @@ obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \ uaccess.o bsd-proc.o \ $(HOST_ABI_DIR)/os-proc.o \ + $(HOST_ABI_DIR)/os-stat.o \ $(HOST_ABI_DIR)/os-sys.o \ $(HOST_ABI_DIR)/os-time.o $(TARGET_ABI_DIR)/target_arch_cpu.o diff --git a/bsd-user/freebsd/os-stat.c b/bsd-user/freebsd/os-stat.c new file mode 100644 index 000..50885d1 --- /dev/null +++ b/bsd-user/freebsd/os-stat.c @@ -0,0 +1,234 @@ +/* + * FreeBSD stat related conversion routines + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#include sys/types.h +#include sys/stat.h +#include sys/mount.h + +#include qemu.h +#include qemu-os.h + +/* + * stat conversion + */ +abi_long h2t_freebsd_stat(abi_ulong target_addr, struct stat *host_st) +{ +struct target_freebsd_stat *target_st; + +if (!lock_user_struct(VERIFY_WRITE, target_st, target_addr, 0)) { +return -TARGET_EFAULT; +} +memset(target_st, 0, sizeof(*target_st)); +__put_user(host_st-st_dev, target_st-st_dev); +__put_user(host_st-st_ino, target_st-st_ino); +__put_user(host_st-st_mode, target_st-st_mode); +__put_user(host_st-st_nlink, target_st-st_nlink); +__put_user(host_st-st_uid, target_st-st_uid); +__put_user(host_st-st_gid, target_st-st_gid); +__put_user(host_st-st_rdev, target_st-st_rdev); +__put_user(host_st-st_atim.tv_sec, target_st-st_atim.tv_sec); +__put_user(host_st-st_atim.tv_nsec, target_st-st_atim.tv_nsec); +__put_user(host_st-st_mtim.tv_sec, target_st-st_mtim.tv_sec); +__put_user(host_st-st_mtim.tv_nsec, target_st-st_mtim.tv_nsec); +__put_user(host_st-st_ctim.tv_sec, target_st-st_ctim.tv_sec); +__put_user(host_st-st_ctim.tv_nsec, target_st-st_ctim.tv_nsec); +__put_user(host_st-st_size, target_st-st_size); +__put_user(host_st-st_blocks, target_st-st_blocks); +__put_user(host_st-st_blksize, target_st-st_blksize); +__put_user(host_st-st_flags, target_st-st_flags); +__put_user(host_st-st_gen, target_st-st_gen); +/* st_lspare not used */ +__put_user(host_st-st_birthtim.tv_sec, target_st-st_birthtim.tv_sec); +__put_user(host_st-st_birthtim.tv_nsec, target_st-st_birthtim.tv_nsec); +unlock_user_struct(target_st, target_addr, 1); + +return 0; +} + +abi_long h2t_freebsd_nstat(abi_ulong target_addr, struct stat *host_st) +{ +struct target_freebsd_nstat *target_st; + +if (!lock_user_struct(VERIFY_WRITE, target_st, target_addr, 0)) { +return -TARGET_EFAULT; +} +memset(target_st, 0, sizeof(*target_st)); +__put_user(host_st-st_dev, target_st-st_dev); +__put_user(host_st-st_ino, target_st-st_ino); +__put_user(host_st-st_mode, target_st-st_mode); +__put_user(host_st-st_nlink, target_st-st_nlink); +__put_user(host_st-st_uid, target_st-st_uid); +__put_user(host_st-st_gid, target_st-st_gid); +__put_user(host_st-st_rdev, target_st-st_rdev); +__put_user(host_st-st_atim.tv_sec,
[Qemu-devel] [PATCH v2 01/19] bsd-user: refresh freebsd system call numbers
Update FreeBSD system call numbers in freebsd/syscall_nr.h. Reviewed-by: Ed Maste ema...@freebsd.org Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/freebsd/syscall_nr.h | 813 ++--- 1 files changed, 445 insertions(+), 368 deletions(-) diff --git a/bsd-user/freebsd/syscall_nr.h b/bsd-user/freebsd/syscall_nr.h index 36336ab..d849024 100644 --- a/bsd-user/freebsd/syscall_nr.h +++ b/bsd-user/freebsd/syscall_nr.h @@ -1,373 +1,450 @@ /* * System call numbers. * - * $FreeBSD: src/sys/sys/syscall.h,v 1.224 2008/08/24 21:23:08 rwatson Exp $ - * created from FreeBSD: head/sys/kern/syscalls.master 182123 2008-08-24 21:20:35Z rwatson + * created from FreeBSD: releng/9.1/sys/kern/syscalls.master 229723 + * 2012-01-06 19:29:16Z jhb */ -#define TARGET_FREEBSD_NR_syscall 0 -#define TARGET_FREEBSD_NR_exit1 -#define TARGET_FREEBSD_NR_fork2 -#define TARGET_FREEBSD_NR_read3 -#define TARGET_FREEBSD_NR_write 4 -#define TARGET_FREEBSD_NR_open5 -#define TARGET_FREEBSD_NR_close 6 -#define TARGET_FREEBSD_NR_wait4 7 -#define TARGET_FREEBSD_NR_link9 -#define TARGET_FREEBSD_NR_unlink 10 -#define TARGET_FREEBSD_NR_chdir 12 -#define TARGET_FREEBSD_NR_fchdir 13 -#define TARGET_FREEBSD_NR_mknod 14 -#define TARGET_FREEBSD_NR_chmod 15 -#define TARGET_FREEBSD_NR_chown 16 -#define TARGET_FREEBSD_NR_break 17 -#define TARGET_FREEBSD_NR_freebsd4_getfsstat 18 -#define TARGET_FREEBSD_NR_getpid 20 -#define TARGET_FREEBSD_NR_mount 21 -#define TARGET_FREEBSD_NR_unmount 22 -#define TARGET_FREEBSD_NR_setuid 23 -#define TARGET_FREEBSD_NR_getuid 24 -#define TARGET_FREEBSD_NR_geteuid 25 -#define TARGET_FREEBSD_NR_ptrace 26 -#define TARGET_FREEBSD_NR_recvmsg 27 -#define TARGET_FREEBSD_NR_sendmsg 28 -#define TARGET_FREEBSD_NR_recvfrom29 -#define TARGET_FREEBSD_NR_accept 30 -#define TARGET_FREEBSD_NR_getpeername 31 -#define TARGET_FREEBSD_NR_getsockname 32 -#define TARGET_FREEBSD_NR_access 33 -#define TARGET_FREEBSD_NR_chflags 34 -#define TARGET_FREEBSD_NR_fchflags35 -#define TARGET_FREEBSD_NR_sync36 -#define TARGET_FREEBSD_NR_kill37 -#define TARGET_FREEBSD_NR_getppid 39 -#define TARGET_FREEBSD_NR_dup 41 -#define TARGET_FREEBSD_NR_pipe42 -#define TARGET_FREEBSD_NR_getegid 43 -#define TARGET_FREEBSD_NR_profil 44 -#define TARGET_FREEBSD_NR_ktrace 45 -#define TARGET_FREEBSD_NR_getgid 47 -#define TARGET_FREEBSD_NR_getlogin49 -#define TARGET_FREEBSD_NR_setlogin50 -#define TARGET_FREEBSD_NR_acct51 -#define TARGET_FREEBSD_NR_sigaltstack 53 -#define TARGET_FREEBSD_NR_ioctl 54 -#define TARGET_FREEBSD_NR_reboot 55 -#define TARGET_FREEBSD_NR_revoke 56 -#define TARGET_FREEBSD_NR_symlink 57 -#define TARGET_FREEBSD_NR_readlink58 -#define TARGET_FREEBSD_NR_execve 59 -#define TARGET_FREEBSD_NR_umask 60 -#define TARGET_FREEBSD_NR_chroot 61 -#define TARGET_FREEBSD_NR_msync 65 -#define TARGET_FREEBSD_NR_vfork 66 -#define TARGET_FREEBSD_NR_sbrk69 -#define TARGET_FREEBSD_NR_sstk70 -#define TARGET_FREEBSD_NR_vadvise 72 -#define TARGET_FREEBSD_NR_munmap 73 -#define TARGET_FREEBSD_NR_mprotect74 -#define TARGET_FREEBSD_NR_madvise 75 -#define TARGET_FREEBSD_NR_mincore 78 -#define TARGET_FREEBSD_NR_getgroups 79 -#define TARGET_FREEBSD_NR_setgroups 80 -#define TARGET_FREEBSD_NR_getpgrp 81 -#define TARGET_FREEBSD_NR_setpgid 82 -#define TARGET_FREEBSD_NR_setitimer 83 -#define TARGET_FREEBSD_NR_swapon 85 -#define TARGET_FREEBSD_NR_getitimer 86 -#define TARGET_FREEBSD_NR_getdtablesize 89 -#define TARGET_FREEBSD_NR_dup290 -#define TARGET_FREEBSD_NR_fcntl 92 -#define TARGET_FREEBSD_NR_select 93 -#define TARGET_FREEBSD_NR_fsync 95 -#define TARGET_FREEBSD_NR_setpriority 96 -#define TARGET_FREEBSD_NR_socket 97 -#define TARGET_FREEBSD_NR_connect 98 -#define TARGET_FREEBSD_NR_getpriority 100 -#define TARGET_FREEBSD_NR_bind104 -#define TARGET_FREEBSD_NR_setsockopt 105 -#define TARGET_FREEBSD_NR_listen 106 -#define TARGET_FREEBSD_NR_gettimeofday116 -#define TARGET_FREEBSD_NR_getrusage 117 -#define TARGET_FREEBSD_NR_getsockopt 118 -#define TARGET_FREEBSD_NR_readv 120 -#define TARGET_FREEBSD_NR_writev 121 -#define TARGET_FREEBSD_NR_settimeofday122 -#define TARGET_FREEBSD_NR_fchown 123 -#define TARGET_FREEBSD_NR_fchmod 124 -#define TARGET_FREEBSD_NR_setreuid126 -#define TARGET_FREEBSD_NR_setregid127 -#define TARGET_FREEBSD_NR_rename 128 -#define TARGET_FREEBSD_NR_flock 131 -#define TARGET_FREEBSD_NR_mkfifo 132 -#define TARGET_FREEBSD_NR_sendto 133 -#define TARGET_FREEBSD_NR_shutdown134 -#define TARGET_FREEBSD_NR_socketpair 135 -#define TARGET_FREEBSD_NR_mkdir 136 -#define
[Qemu-devel] [PATCH v2 13/19] bsd-user: add support for socket related system calls
This change adds support or stubs for socket related system calls including accept(2), bind(2), connect(2), getpeername(2), getsockname(2), getsockopt(2), setsockopt(2), listen(2), recvfrom(2), recvmsg(2), sendmsg(2), sendto(2), socket(2), socketpair(2), shutdown(2), setfib(2), sctp_peeloff(2), sctp_generic_sendmsg(2), sctp_generic_recvmsg(2), sendfile(2), and freebsd4_sendfile(2). Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/Makefile.objs |4 +- bsd-user/bsd-socket.c| 108 + bsd-user/bsd-socket.h| 266 bsd-user/freebsd/os-socket.c | 149 bsd-user/freebsd/os-socket.h | 548 ++ bsd-user/freebsd/qemu-os.h |6 + bsd-user/netbsd/os-socket.c |1 + bsd-user/netbsd/os-socket.h | 98 bsd-user/openbsd/os-socket.c |1 + bsd-user/openbsd/os-socket.h | 98 bsd-user/qemu-bsd.h |8 + bsd-user/syscall.c | 93 +++ 12 files changed, 1378 insertions(+), 2 deletions(-) create mode 100644 bsd-user/bsd-socket.c create mode 100644 bsd-user/bsd-socket.h create mode 100644 bsd-user/freebsd/os-socket.c create mode 100644 bsd-user/freebsd/os-socket.h create mode 100644 bsd-user/netbsd/os-socket.c create mode 100644 bsd-user/netbsd/os-socket.h create mode 100644 bsd-user/openbsd/os-socket.c create mode 100644 bsd-user/openbsd/os-socket.h diff --git a/bsd-user/Makefile.objs b/bsd-user/Makefile.objs index 1a33a6d..9869837 100644 --- a/bsd-user/Makefile.objs +++ b/bsd-user/Makefile.objs @@ -1,6 +1,6 @@ obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \ - uaccess.o bsd-mem.o bsd-proc.o \ + uaccess.o bsd-mem.o bsd-proc.o bsd-socket.o \ $(HOST_ABI_DIR)/os-proc.o \ - $(HOST_ABI_DIR)/os-stat.o \ + $(HOST_ABI_DIR)/os-socket.o $(HOST_ABI_DIR)/os-stat.o \ $(HOST_ABI_DIR)/os-sys.o \ $(HOST_ABI_DIR)/os-time.o $(TARGET_ABI_DIR)/target_arch_cpu.o diff --git a/bsd-user/bsd-socket.c b/bsd-user/bsd-socket.c new file mode 100644 index 000..c1a3b49 --- /dev/null +++ b/bsd-user/bsd-socket.c @@ -0,0 +1,108 @@ +/* + * BSD socket system call related helpers + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#include sys/types.h +#include sys/socket.h +#include sys/un.h +#include netinet/in.h + +#include qemu.h +#include qemu-bsd.h + +/* + * socket conversion + */ +abi_long target_to_host_sockaddr(struct sockaddr *addr, abi_ulong target_addr, +socklen_t len) +{ +const socklen_t unix_maxlen = sizeof(struct sockaddr_un); +sa_family_t sa_family; +struct target_sockaddr *target_saddr; + +target_saddr = lock_user(VERIFY_READ, target_addr, len, 1); +if (target_saddr == 0) { +return -TARGET_EFAULT; +} + +sa_family = target_saddr-sa_family; + +/* + * Oops. The caller might send a incomplete sun_path; sun_path + * must be terminated by \0 (see the manual page), but unfortunately + * it is quite common to specify sockaddr_un length as + * strlen(x-sun_path) while it should be strlen(...) + 1. We will + * fix that here if needed. + */ +if (target_saddr-sa_family == AF_UNIX) { +if (len unix_maxlen len 0) { +char *cp = (char *)target_saddr; + +if (cp[len-1] !cp[len]) { +len++; +} +} +if (len unix_maxlen) { +len = unix_maxlen; +} +} + +memcpy(addr, target_saddr, len); +addr-sa_family = sa_family;/* type uint8_t */ +addr-sa_len = target_saddr-sa_len;/* type uint8_t */ +unlock_user(target_saddr, target_addr, 0); + +return 0; +} + +abi_long host_to_target_sockaddr(abi_ulong target_addr, struct sockaddr *addr, +socklen_t len) +{ +struct target_sockaddr *target_saddr; + +target_saddr = lock_user(VERIFY_WRITE, target_addr, len, 0); +if (target_saddr == 0) { +return -TARGET_EFAULT; +} +memcpy(target_saddr, addr, len); +target_saddr-sa_family = addr-sa_family; /* type uint8_t */ +target_saddr-sa_len = addr-sa_len;/* type uint8_t */ +unlock_user(target_saddr, target_addr, len); + +return
Re: [Qemu-devel] audit needed for signal handlers
Il 11/11/2013 17:56, Anthony Liguori ha scritto: On Mon, Nov 11, 2013 at 8:50 AM, Eric Blake ebl...@redhat.com wrote: Quick - identify the bug in this code (from ui/curses.c): static void curses_winch_handler(int signum) { struct winsize { unsigned short ws_row; unsigned short ws_col; unsigned short ws_xpixel; /* unused */ unsigned short ws_ypixel; /* unused */ } ws; /* terminal size changed */ if (ioctl(1, TIOCGWINSZ, ws) == -1) return; resize_term(ws.ws_row, ws.ws_col); curses_calc_pad(); invalidate = 1; /* some systems require this */ signal(SIGWINCH, curses_winch_handler); } Here's a hint: ioctl() can clobber errno. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. Isn't this precisely why EINTR exists? No. do { rc = read(...); } while (rc == -1 errno == EINTR); /* signal handler runs here */ if (errno == EAGAIN) { ... } That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? Paolo
[Qemu-devel] [PATCH v2 03/19] bsd-user: move OS/arch dependent code for strace into separate directories
This change moves host OS and arch dependent code for the sysarch system call related to the -strace functionality into the appropriate HOST_ABI_DIR and TARGET_ABI_DIR directories. Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/arm/syscall.h | 36 +++ bsd-user/arm/target_arch_sysarch.h | 78 ++ bsd-user/freebsd/os-strace.h | 29 + bsd-user/freebsd/strace.list | 76 +-- bsd-user/i386/syscall.h| 23 bsd-user/i386/target_arch_sysarch.h| 78 ++ bsd-user/mips/syscall.h| 52 ++ bsd-user/mips/target_arch_sysarch.h| 69 + bsd-user/mips64/syscall.h | 53 ++ bsd-user/mips64/target_arch_sysarch.h | 69 + bsd-user/netbsd/os-strace.h|1 + bsd-user/openbsd/os-strace.h |1 + bsd-user/qemu.h| 26 + bsd-user/sparc/syscall.h | 29 +- bsd-user/sparc/target_arch_sysarch.h | 52 ++ bsd-user/sparc64/syscall.h | 28 +- bsd-user/sparc64/target_arch_sysarch.h | 52 ++ bsd-user/strace.c | 175 +-- bsd-user/x86_64/syscall.h | 26 +- bsd-user/x86_64/target_arch_sysarch.h | 76 ++ 20 files changed, 962 insertions(+), 67 deletions(-) create mode 100644 bsd-user/arm/syscall.h create mode 100644 bsd-user/arm/target_arch_sysarch.h create mode 100644 bsd-user/freebsd/os-strace.h create mode 100644 bsd-user/i386/target_arch_sysarch.h create mode 100644 bsd-user/mips/syscall.h create mode 100644 bsd-user/mips/target_arch_sysarch.h create mode 100644 bsd-user/mips64/syscall.h create mode 100644 bsd-user/mips64/target_arch_sysarch.h create mode 100644 bsd-user/netbsd/os-strace.h create mode 100644 bsd-user/openbsd/os-strace.h create mode 100644 bsd-user/sparc/target_arch_sysarch.h create mode 100644 bsd-user/sparc64/target_arch_sysarch.h create mode 100644 bsd-user/x86_64/target_arch_sysarch.h diff --git a/bsd-user/arm/syscall.h b/bsd-user/arm/syscall.h new file mode 100644 index 000..bc3d6e6 --- /dev/null +++ b/bsd-user/arm/syscall.h @@ -0,0 +1,36 @@ +#ifndef __ARCH_SYSCALL_H_ +#define __ARCH_SYSCALL_H_ + +struct target_pt_regs { +abi_long uregs[17]; +}; + +#define ARM_cpsruregs[16] +#define ARM_pc uregs[15] +#define ARM_lr uregs[14] +#define ARM_sp uregs[13] +#define ARM_ip uregs[12] +#define ARM_fp uregs[11] +#define ARM_r10 uregs[10] +#define ARM_r9 uregs[9] +#define ARM_r8 uregs[8] +#define ARM_r7 uregs[7] +#define ARM_r6 uregs[6] +#define ARM_r5 uregs[5] +#define ARM_r4 uregs[4] +#define ARM_r3 uregs[3] +#define ARM_r2 uregs[2] +#define ARM_r1 uregs[1] +#define ARM_r0 uregs[0] + +#define ARM_SYSCALL_BASE0 /* XXX: FreeBSD only */ + +#define TARGET_FREEBSD_ARM_SYNC_ICACHE 0 +#define TARGET_FREEBSD_ARM_DRAIN_WRITEBUF 1 +#define TARGET_FREEBSD_ARM_SET_TP 2 +#define TARGET_FREEBSD_ARM_GET_TP 3 + +#define TARGET_HW_MACHINE arm +#define TARGET_HW_MACHINE_ARCH armv6 + +#endif /* !__ARCH_SYSCALL_H_ */ diff --git a/bsd-user/arm/target_arch_sysarch.h b/bsd-user/arm/target_arch_sysarch.h new file mode 100644 index 000..96d617a --- /dev/null +++ b/bsd-user/arm/target_arch_sysarch.h @@ -0,0 +1,78 @@ +/* + * arm sysarch() system call emulation + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARCH_SYSARCH_H_ +#define __ARCH_SYSARCH_H_ + +#include syscall.h +#include target_arch.h + +static inline abi_long do_freebsd_arch_sysarch(CPUARMState *env, int op, +abi_ulong parms) +{ +int ret = 0; + +switch (op) { +case TARGET_FREEBSD_ARM_SYNC_ICACHE: +case TARGET_FREEBSD_ARM_DRAIN_WRITEBUF: +break; + +case TARGET_FREEBSD_ARM_SET_TP: +target_cpu_set_tls(env, parms); +break; + +case TARGET_FREEBSD_ARM_GET_TP: +ret = target_cpu_get_tls(env); +break; + +default: +ret = -TARGET_EINVAL; +break; +} +return ret; +} + +static inline void do_freebsd_arch_print_sysarch( +const struct syscallname *name, abi_long arg1, abi_long arg2, +abi_long
[Qemu-devel] [PATCH v2 16/19] bsd-user: add support for extended attribute and ACL related syscalls
This change add support for extended attribute and Access Control List (ACL) related system calls including extattrctl(), extattr_set_file(2), extattr_delete_file(2), extattr_set_fd(2), extattr_get_fd(2), extattr_delete_fd(2), extattr_get_link(2), extattr_set_link(2), extattr_delete_link(2), extattr_list_fd(2), extattr_list_file(2), extattr_list_link(2), __acl_aclcheck_fd(), __acl_aclcheck_file(), __acl_aclcheck_link(), __acl_delete_fd(), __acl_delete_file(), __acl_delete_link(), __acl_get_fd(), __acl_get_file(), __acl_get_link(), __acl_get_fd(), __acl_set_file(), and __acl_set_link(). Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/Makefile.objs|2 +- bsd-user/freebsd/os-extattr.c | 119 bsd-user/freebsd/os-extattr.h | 644 + bsd-user/freebsd/qemu-os.h|6 + bsd-user/netbsd/os-extattr.h | 247 bsd-user/openbsd/os-extattr.h | 247 bsd-user/syscall.c| 104 +++ 7 files changed, 1368 insertions(+), 1 deletions(-) create mode 100644 bsd-user/freebsd/os-extattr.c create mode 100644 bsd-user/freebsd/os-extattr.h create mode 100644 bsd-user/netbsd/os-extattr.h create mode 100644 bsd-user/openbsd/os-extattr.h diff --git a/bsd-user/Makefile.objs b/bsd-user/Makefile.objs index 242e6f4..b9eaf2d 100644 --- a/bsd-user/Makefile.objs +++ b/bsd-user/Makefile.objs @@ -1,6 +1,6 @@ obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \ uaccess.o bsd-ioctl.o bsd-mem.o bsd-proc.o bsd-socket.o \ - $(HOST_ABI_DIR)/os-proc.o \ + $(HOST_ABI_DIR)/os-extattr.o $(HOST_ABI_DIR)/os-proc.o \ $(HOST_ABI_DIR)/os-socket.o $(HOST_ABI_DIR)/os-stat.o \ $(HOST_ABI_DIR)/os-sys.o $(HOST_ABI_DIR)/os-thread.o \ $(HOST_ABI_DIR)/os-time.o $(TARGET_ABI_DIR)/target_arch_cpu.o diff --git a/bsd-user/freebsd/os-extattr.c b/bsd-user/freebsd/os-extattr.c new file mode 100644 index 000..7a10047 --- /dev/null +++ b/bsd-user/freebsd/os-extattr.c @@ -0,0 +1,119 @@ +/* + * FreeBSD extend attributes and ACL conversions + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#include sys/types.h +#ifndef _ACL_PRIVATE +#define _ACL_PRIVATE +#endif +#include sys/acl.h + +#include qemu.h +#include qemu-os.h + +/* + * FreeBSD ACL conversion. + */ +abi_long t2h_freebsd_acl(struct acl *host_acl, abi_ulong target_addr) +{ +uint32_t i; +struct target_freebsd_acl *target_acl; + +if (!lock_user_struct(VERIFY_READ, target_acl, target_addr, 1)) { +return -TARGET_EFAULT; +} +__get_user(host_acl-acl_maxcnt, target_acl-acl_maxcnt); +__get_user(host_acl-acl_cnt, target_acl-acl_cnt); + +for (i = 0; i host_acl-acl_maxcnt; i++) { +__get_user(host_acl-acl_entry[i].ae_tag, +target_acl-acl_entry[i].ae_tag); +__get_user(host_acl-acl_entry[i].ae_id, +target_acl-acl_entry[i].ae_id); +__get_user(host_acl-acl_entry[i].ae_perm, +target_acl-acl_entry[i].ae_perm); +__get_user(host_acl-acl_entry[i].ae_entry_type, +target_acl-acl_entry[i].ae_entry_type); +__get_user(host_acl-acl_entry[i].ae_flags, +target_acl-acl_entry[i].ae_flags); +} + +unlock_user_struct(target_acl, target_addr, 0); +return 0; +} + +abi_long h2t_freebsd_acl(abi_ulong target_addr, struct acl *host_acl) +{ +uint32_t i; +struct target_freebsd_acl *target_acl; + +if (!lock_user_struct(VERIFY_WRITE, target_acl, target_addr, 0)) { +return -TARGET_EFAULT; +} + +__put_user(host_acl-acl_maxcnt, target_acl-acl_maxcnt); +__put_user(host_acl-acl_cnt, target_acl-acl_cnt); + +for (i = 0; i host_acl-acl_maxcnt; i++) { +__put_user(host_acl-acl_entry[i].ae_tag, +target_acl-acl_entry[i].ae_tag); +__put_user(host_acl-acl_entry[i].ae_id, +target_acl-acl_entry[i].ae_id); +__put_user(host_acl-acl_entry[i].ae_perm, +target_acl-acl_entry[i].ae_perm); +__get_user(host_acl-acl_entry[i].ae_entry_type, +target_acl-acl_entry[i].ae_entry_type); +__get_user(host_acl-acl_entry[i].ae_flags, +target_acl-acl_entry[i].ae_flags); +} + +
[Qemu-devel] [PATCH v2 05/19] bsd-user: move target arch and host OS dependent code out of syscall.c
This change moves the system call handler for sysctl(2) and sysarch(2) from syscall.c to the OS and arch dependent directories. This eliminates many of the #ifdef's in syscall.c. These system call handlers are now located in the host os and target arch directories. Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/Makefile.objs |2 +- bsd-user/arm/target_arch_sigtramp.h | 33 bsd-user/bsdload.c | 170 ++-- bsd-user/elfload.c |9 +- bsd-user/freebsd/os-sys.c | 268 +++ bsd-user/freebsd/target_os_stack.h | 157 ++ bsd-user/i386/target_arch_sigtramp.h| 11 ++ bsd-user/mips/target_arch_sigtramp.h| 23 +++ bsd-user/mips64/target_arch_sigtramp.h | 23 +++ bsd-user/netbsd/os-sys.c| 46 ++ bsd-user/netbsd/target_os_stack.h | 33 bsd-user/openbsd/os-sys.c | 46 ++ bsd-user/openbsd/target_os_stack.h | 33 bsd-user/qemu.h | 30 +++- bsd-user/sparc/target_arch_sigtramp.h | 11 ++ bsd-user/sparc64/target_arch_sigtramp.h | 11 ++ bsd-user/syscall.c | 210 +++- bsd-user/x86_64/target_arch_sigtramp.h | 11 ++ 18 files changed, 884 insertions(+), 243 deletions(-) create mode 100644 bsd-user/arm/target_arch_sigtramp.h create mode 100644 bsd-user/freebsd/os-sys.c create mode 100644 bsd-user/freebsd/target_os_stack.h create mode 100644 bsd-user/i386/target_arch_sigtramp.h create mode 100644 bsd-user/mips/target_arch_sigtramp.h create mode 100644 bsd-user/mips64/target_arch_sigtramp.h create mode 100644 bsd-user/netbsd/os-sys.c create mode 100644 bsd-user/netbsd/target_os_stack.h create mode 100644 bsd-user/openbsd/os-sys.c create mode 100644 bsd-user/openbsd/target_os_stack.h create mode 100644 bsd-user/sparc/target_arch_sigtramp.h create mode 100644 bsd-user/sparc64/target_arch_sigtramp.h create mode 100644 bsd-user/x86_64/target_arch_sigtramp.h diff --git a/bsd-user/Makefile.objs b/bsd-user/Makefile.objs index 41e8dce..b5ed89e 100644 --- a/bsd-user/Makefile.objs +++ b/bsd-user/Makefile.objs @@ -1,2 +1,2 @@ obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \ - uaccess.o $(TARGET_ABI_DIR)/target_arch_cpu.o + uaccess.o $(HOST_ABI_DIR)/os-sys.o $(TARGET_ABI_DIR)/target_arch_cpu.o diff --git a/bsd-user/arm/target_arch_sigtramp.h b/bsd-user/arm/target_arch_sigtramp.h new file mode 100644 index 000..98dc313 --- /dev/null +++ b/bsd-user/arm/target_arch_sigtramp.h @@ -0,0 +1,33 @@ + +#ifndef _TARGET_ARCH_SIGTRAMP_H_ +#define _TARGET_ARCH_SIGTRAMP_H_ + +/* Compare to arm/arm/locore.S ENTRY_NP(sigcode) */ +static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc, +unsigned sys_sigreturn) +{ +int i; +uint32_t sys_exit = TARGET_FREEBSD_NR_exit; +/* + * The code has to load r7 manually rather than using + * ldr r7, =SYS_return to make sure the size of the + * code is correct. + */ +uint32_t sigtramp_code[] = { +/* 1 */ 0xE1AD, /* mov r0, sp */ +/* 2 */ 0xE59F700C, /* ldr r7, [pc, #12] */ +/* 3 */ 0xEF00 + sys_sigreturn, /* swi (SYS_sigreturn) */ +/* 4 */ 0xE59F7008, /* ldr r7, [pc, #8] */ +/* 5 */ 0xEF00 + sys_exit, /* swi (SYS_exit)*/ +/* 6 */ 0xEAFA, /* b . -16 */ +/* 7 */ sys_sigreturn, +/* 8 */ sys_exit +}; + +for (i = 0; i 8; i++) { +tswap32s(sigtramp_code[i]); +} + +return memcpy_to_target(offset, sigtramp_code, TARGET_SZSIGCODE); +} +#endif /* _TARGET_ARCH_SIGTRAMP_H_ */ diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c index 2abc713..45fdcf8 100644 --- a/bsd-user/bsdload.c +++ b/bsd-user/bsdload.c @@ -1,4 +1,19 @@ -/* Code for loading BSD executables. Mostly linux kernel code. */ +/* + * Load BSD executables. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ #include sys/types.h #include sys/stat.h @@ -26,38 +41,22 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src, return 0; } -static int in_group_p(gid_t g) -{ -/* return TRUE if we're in the specified group, FALSE otherwise */ -int ngroup; -int i; -
[Qemu-devel] [PATCH v2 19/19] bsd-user: fix linking conflicts with FreeBSD libcrypto
FreeBSD has it's own AES_set_decrypt_key, etc. in libcrypto. This change fixes these conflicts and allows statically linking BSD user mode qemu. Signed-off-by: Stacey Son s...@freebsd.org --- include/qemu/aes.h |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/include/qemu/aes.h b/include/qemu/aes.h index e79c707..6d253a3 100644 --- a/include/qemu/aes.h +++ b/include/qemu/aes.h @@ -10,6 +10,15 @@ struct aes_key_st { }; typedef struct aes_key_st AES_KEY; +/* FreeBSD has it's own AES_set_decrypt_key in -lcrypto, avoid conflicts. */ +#ifdef __FreeBSD__ +#define AES_set_encrypt_key QEMU_AES_set_encrypt_key +#define AES_set_decrypt_key QEMU_AES_set_decrypt_key +#define AES_encrypt QEMU_AES_encrypt +#define AES_decrypt QEMU_AES_decrypt +#define AES_cbc_encrypt QEMU_AES_cbc_encrypt +#endif + int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); int AES_set_decrypt_key(const unsigned char *userKey, const int bits, -- 1.7.8
Re: [Qemu-devel] audit needed for signal handlers
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 Il 11/11/2013 18:08, Eric Blake ha scritto: That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? signalfd is currently a Linux-only concept - what happens on BSD? It is emulated with a thread and sigwait (see compatfd.c). Paolo -BEGIN PGP SIGNATURE- Version: GnuPG v2.0.22 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/ iQIcBAEBAgAGBQJSgQ+pAAoJEBvWZb6bTYbyC1YQAIleHFtvyjlVzmY5MxHKN3L2 YkCLE3s/8dRRvH1IjD61LA3RxJHFgBK6+i45bw8Am+ydRVoCLJ3/tyPqP1ygyg/q w/lhITmpe4h0w8W4omI3RHh3+Ueh3Slcobsrzc1js197Dy4eL2RLdfvtIVrbe8Qj cPz7hML+2sGv6zqL7mebdVRjxqKNMoVXyLuuFGndAc7shSGJ8XLl4Cn/b46Efl9z zjNzCxd0j2FJ3mcoINZ1ON2pv73pwqhDCaIVZFSMwOhuY09Rh1rT9tJ+pbXI1ZlU pz6ThYcnxJSTqWOqVm6GJxA0bgec0ZejZkPYALgn7dWRIL9iLpr5FmxDpg69+me3 p6ZoZbF1QI5BzndEJw97/4S9cumxkPVo1KPkJAPA3uQfta/L9l2q4PReMbHj3qTS q35wCOI9se/JkiB/9872WvXi9b6OSxTQEXFpDspglojRgd96yuWVI2aBeUtPLKJh 7fShY4JRU41cC1cp9gdK/dv1wFCs6D6U5mPnsN0t0hYboZiIwLusGrhSagWf34aK Fg829GgKhebE/YD/lupMIfqEPb3UxkK7y/ME27789eiP81j/ckuXBI++/yPh5iIx dXJx0nD8qJApjKePjJGJYRXGIB9MS/GTi2mdQltVkcr74CLaoPFe2epFhB04zmC2 k4oCTb/TZ78+Eh/oHt7p =c3gn -END PGP SIGNATURE-
[Qemu-devel] [PATCH v2 12/19] bsd-user: add support for memory management related system calls
This change adds support or stubs for memory management related system calls including mmap(2), munmap(2), mprotect(2), msync(2), mlock(2), munlock(2), mlockall(2), munlockall(2), madvise(2), minherit(2), mincore(2), shm_open(2), shm_unlink(2), shmget(2), shmctl(2), shmat(2), shmdt(2), vadvise(), sbrk(), sstk(), and freebsd6_mmap(). Signed-off-by: Stacey Son s...@freebsd.org --- bsd-user/Makefile.objs |2 +- bsd-user/bsd-mem.c | 122 bsd-user/bsd-mem.h | 393 ++ bsd-user/mmap.c| 493 ++-- bsd-user/qemu-bsd.h| 10 + bsd-user/qemu.h|3 +- bsd-user/syscall.c | 174 ++--- 7 files changed, 942 insertions(+), 255 deletions(-) create mode 100644 bsd-user/bsd-mem.c create mode 100644 bsd-user/bsd-mem.h diff --git a/bsd-user/Makefile.objs b/bsd-user/Makefile.objs index ee70866..1a33a6d 100644 --- a/bsd-user/Makefile.objs +++ b/bsd-user/Makefile.objs @@ -1,5 +1,5 @@ obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \ - uaccess.o bsd-proc.o \ + uaccess.o bsd-mem.o bsd-proc.o \ $(HOST_ABI_DIR)/os-proc.o \ $(HOST_ABI_DIR)/os-stat.o \ $(HOST_ABI_DIR)/os-sys.o \ diff --git a/bsd-user/bsd-mem.c b/bsd-user/bsd-mem.c new file mode 100644 index 000..bfe03aa --- /dev/null +++ b/bsd-user/bsd-mem.c @@ -0,0 +1,122 @@ +/* + * memory management system conversion routines + * + * Copyright (c) 2013 Stacey D. Son + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see http://www.gnu.org/licenses/. + */ + +#include sys/ipc.h +#include sys/shm.h + +#include qemu.h +#include qemu-bsd.h + +struct bsd_shm_regions bsd_shm_regions[N_BSD_SHM_REGIONS]; + +abi_ulong bsd_target_brk; +abi_ulong bsd_target_original_brk; + +void target_set_brk(abi_ulong new_brk) +{ + +bsd_target_original_brk = bsd_target_brk = HOST_PAGE_ALIGN(new_brk); +} + +abi_long target_to_host_ipc_perm(struct ipc_perm *host_ip, +abi_ulong target_addr) +{ +struct target_ipc_perm *target_ip; + +if (!lock_user_struct(VERIFY_READ, target_ip, target_addr, 1)) { +return -TARGET_EFAULT; +} +__get_user(host_ip-cuid, target_ip-cuid); +__get_user(host_ip-cgid, target_ip-cgid); +__get_user(host_ip-uid, target_ip-uid); +__get_user(host_ip-gid, target_ip-gid); +__get_user(host_ip-mode, target_ip-mode); +__get_user(host_ip-seq, target_ip-seq); +__get_user(host_ip-key, target_ip-key); +unlock_user_struct(target_ip, target_addr, 0); + +return 0; +} + +abi_long host_to_target_ipc_perm(abi_ulong target_addr, +struct ipc_perm *host_ip) +{ +struct target_ipc_perm *target_ip; + +if (!lock_user_struct(VERIFY_WRITE, target_ip, target_addr, 0)) { +return -TARGET_EFAULT; +} +__put_user(host_ip-cuid, target_ip-cuid); +__put_user(host_ip-cgid, target_ip-cgid); +__put_user(host_ip-uid, target_ip-uid); +__put_user(host_ip-gid, target_ip-gid); +__put_user(host_ip-mode, target_ip-mode); +__put_user(host_ip-seq, target_ip-seq); +__put_user(host_ip-key, target_ip-key); +unlock_user_struct(target_ip, target_addr, 1); + +return 0; +} + +abi_long target_to_host_shmid_ds(struct shmid_ds *host_sd, +abi_ulong target_addr) +{ +struct target_shmid_ds *target_sd; + +if (!lock_user_struct(VERIFY_READ, target_sd, target_addr, 1)) { +return -TARGET_EFAULT; +} +if (target_to_host_ipc_perm((host_sd-shm_perm), target_addr)) { +return -TARGET_EFAULT; +} +__get_user(host_sd-shm_segsz, target_sd-shm_segsz); +__get_user(host_sd-shm_lpid, target_sd-shm_lpid); +__get_user(host_sd-shm_cpid, target_sd-shm_cpid); +__get_user(host_sd-shm_nattch, target_sd-shm_nattch); +__get_user(host_sd-shm_atime, target_sd-shm_atime); +__get_user(host_sd-shm_dtime, target_sd-shm_dtime); +__get_user(host_sd-shm_ctime, target_sd-shm_ctime); +unlock_user_struct(target_sd, target_addr, 0); + +return 0; +} + +abi_long host_to_target_shmid_ds(abi_ulong target_addr, +struct shmid_ds *host_sd) +{ +struct target_shmid_ds *target_sd; + +if (!lock_user_struct(VERIFY_WRITE, target_sd, target_addr, 0)) { +return -TARGET_EFAULT; +} +if
Re: [Qemu-devel] audit needed for signal handlers
On 11/11/2013 09:56 AM, Anthony Liguori wrote: Here's a hint: ioctl() can clobber errno. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. Isn't this precisely why EINTR exists? That's part of the equation, but not everything. EINTR exists for a system call that was cut short by the delivery of a signal; if you check for errno==EINTR after a call that is documented to support it (such as write() or poll()), then you know that the call was interrupted; use of SA_RESTART with sigaction() can also control whether you will even see EINTR in the first place for some functions. But consider what happens when the system call completes normally, and the signal handler then gets invoked in between the syscall completion and the later code that checks the value of errno. There, errno will NOT be EINTR, and it is vital that the signal handler not corrupt errno prior to returning control to normal execution context. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] audit needed for signal handlers
On 11 November 2013 17:05, Paolo Bonzini pbonz...@redhat.com wrote: That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? Eric specifically points out one which is not. (I'm pretty sure that 'reinstall signal handler at end of signal handler' is ancient voodoo that we don't want either, incidentally.) -- PMM
[Qemu-devel] DMA Engine Support
I am working with two DMA engines at the moment. I want to be able to provide guest access to a Crystal Beach DMA engine and a PLX 87xx DMA engine. Are there any current efforts to provide support for these DMA engines? What I have looked at so far indicates that they will need to have a QEMU driver written to provide guest access and would prefer utilize or continue any current effort if possible. Forrest
[Qemu-devel] [PATCH v2 18/19] bsd-user: add arm, mips and mips64 options to configure target-list
This change adds arm-bsd-user, mips-bsd-user, mips64-bsd-user, mips64el-bsd-user, and mipsel-bsd-user as --target-list options to configure. Signed-off-by: Stacey Son s...@freebsd.org --- default-configs/arm-bsd-user.mak |3 +++ default-configs/mips-bsd-user.mak |1 + default-configs/mips64-bsd-user.mak |1 + default-configs/mips64el-bsd-user.mak |1 + default-configs/mipsel-bsd-user.mak |1 + 5 files changed, 7 insertions(+), 0 deletions(-) create mode 100644 default-configs/arm-bsd-user.mak create mode 100644 default-configs/mips-bsd-user.mak create mode 100644 default-configs/mips64-bsd-user.mak create mode 100644 default-configs/mips64el-bsd-user.mak create mode 100644 default-configs/mipsel-bsd-user.mak diff --git a/default-configs/arm-bsd-user.mak b/default-configs/arm-bsd-user.mak new file mode 100644 index 000..869e6fb --- /dev/null +++ b/default-configs/arm-bsd-user.mak @@ -0,0 +1,3 @@ +# Default configuration for arm-bsd-user + +CONFIG_GDBSTUB_XML=y diff --git a/default-configs/mips-bsd-user.mak b/default-configs/mips-bsd-user.mak new file mode 100644 index 000..3fb129a --- /dev/null +++ b/default-configs/mips-bsd-user.mak @@ -0,0 +1 @@ +# Default configuration for mips-bsd-user diff --git a/default-configs/mips64-bsd-user.mak b/default-configs/mips64-bsd-user.mak new file mode 100644 index 000..d4e72a6 --- /dev/null +++ b/default-configs/mips64-bsd-user.mak @@ -0,0 +1 @@ +# Default configuration for mips64-bsd-user diff --git a/default-configs/mips64el-bsd-user.mak b/default-configs/mips64el-bsd-user.mak new file mode 100644 index 000..b879228 --- /dev/null +++ b/default-configs/mips64el-bsd-user.mak @@ -0,0 +1 @@ +# Default configuration for mips64el-bsd-user diff --git a/default-configs/mipsel-bsd-user.mak b/default-configs/mipsel-bsd-user.mak new file mode 100644 index 000..312b9d5 --- /dev/null +++ b/default-configs/mipsel-bsd-user.mak @@ -0,0 +1 @@ +# Default configuration for mipsel-bsd-user -- 1.7.8
Re: [Qemu-devel] audit needed for signal handlers
On 11 November 2013 16:56, Anthony Liguori anth...@codemonkey.ws wrote: On Mon, Nov 11, 2013 at 8:50 AM, Eric Blake ebl...@redhat.com wrote: Here's a hint: ioctl() can clobber errno. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. Isn't this precisely why EINTR exists? EINTR won't help you in the case like: ret = some_syscall(); [execution returns from syscall, with errno set to X] [signal happens here; handler trashes errno] if (ret 0) { use errno; } EINTR exists mostly because properly resuming syscalls was too hard for Bell Labs :-) -- PMM
Re: [Qemu-devel] [PATCH 0/2] exec: alternative fix for master abort woes
Il 11/11/2013 17:43, Michael S. Tsirkin ha scritto: On Thu, Nov 07, 2013 at 06:29:40PM +0100, Paolo Bonzini wrote: Il 07/11/2013 17:47, Michael S. Tsirkin ha scritto: That's on kvm with 52 bit address. But where I would be concerned is systems with e.g. 36 bit address space where we are doubling the cost of the lookup. E.g. try i386 and not x86_64. Tried now... P_L2_LEVELS pre-patch post-patch i386 3 6 x86_64 4 6 I timed the inl_from_qemu test of vmexit.flat with both KVM and TCG. With TCG there's indeed a visible penalty of 20 cycles for i386 and 10 for x86_64 (you can extrapolate to 30 cycles for TARGET_PHYS_ADDR_SPACE_BITS=32 targets). So how did you measure this exactly? I mention extrapolation because x86 is TARGET_PHYS_ADDR_SPACE_BITS=36, not 32. Paolo
Re: [Qemu-devel] audit needed for signal handlers
On 11/11/2013 10:13 AM, Peter Maydell wrote: On 11 November 2013 17:05, Paolo Bonzini pbonz...@redhat.com wrote: That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? Eric specifically points out one which is not. (I'm pretty sure that 'reinstall signal handler at end of signal handler' is ancient voodoo that we don't want either, incidentally.) Reinstalling the signal handler is voodoo needed only for systems where SA_RESETHAND is the default behavior of signal() (POSIX says that the older signal() is implementation-defined whether it behaves like SA_RESETHAND|SA_NODEFER [SysV] or SA_RESTART [BSD] - so it is already mandatory to use sigaction() if you don't want to be bitten by the difference in semantics; but if you can assume working sigaction(), then don't use signal() or SA_RESETHAND in the first place, and you don't need the reinstall voodoo in your handlers). -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] audit needed for signal handlers
Il 11/11/2013 18:13, Peter Maydell ha scritto: That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? Eric specifically points out one which is not. (I'm pretty sure that 'reinstall signal handler at end of signal handler' is ancient voodoo that we don't want either, incidentally.) Yeah, I was convinced it was---I still cannot find a reason why SIGWINCH needs to be handled synchronously. resize_term is definitely not signal safe; the man page reflects 10-year-old (or more) signal handling lore: While these functions are intended to be used to support a signal handler (i.e., for SIGWINCH), care should be taken to avoid invoking them in a context where malloc or realloc may have been interrupted, since it uses those functions. Calling malloc/realloc from a signal handler is taboo these days... Paolo
[Qemu-devel] [PATCH RFC 0/5] making address spaces 64 bit wide
At the moment, exec ignores high bits in each address, for efficiency. This is incorrect: devices can do full 64 bit DMA, it's only the CPU that is limited by target address space. Using full 64 bit addresses was clocked at 12% performance hit on a microbenchmark. To solve, teach pagetables to skip bits at any level and not just the lowest level. This should solve the performance problem (only one line of code changed on the data path). I'm still trying to figure out how to measure speed properly with TCG, sending this out for early feedback and flames. Michael S. Tsirkin (3): exec: relace leaf with skip exec: extend skip field to 3 bits exec: memory radix tree page level compression Paolo Bonzini (2): split definitions for exec.c and translate-all.c radix trees exec: make address spaces 64-bit wide translate-all.h | 7 exec.c | 117 +++- translate-all.c | 32 +--- 3 files changed, 117 insertions(+), 39 deletions(-) -- MST
Re: [Qemu-devel] audit needed for signal handlers
On 11/11/2013 10:05 AM, Paolo Bonzini wrote: That said, aren't all signals in QEMU (except SIG_IPI) caught with signalfd and the handlers run synchronously in the iothread? signalfd is currently a Linux-only concept - what happens on BSD? -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH RFC 5/5] exec: memory radix tree page level compression
On 11/11/2013 09:41 AM, Michael S. Tsirkin wrote: At the moment, memory radix tree is already variable width, but it can only skip the low bits of address. This is efficient if we have huge memory regions but inefficient if we are only using a tiny portion of the address space. After we have built up the map, it's a simple matter to detect configurations where a single L2 entry is valid. We can them speed up the lookup by skipping one or more levels. s/them/then/ -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
On 11 Nov 2013, at 12:43, Stefan Hajnoczi wrote: Why is it necessary to push this task down into the host? I don't understand the advantage of this approach except that maybe it works around certain misconfigurations, I/O scheduler quirks, or plain old bugs - all of which should be investigated and fixed at the source instead of adding another layer of code to mask them. I can see an argument why a guest with two very differently performing disks attached might be best served by two worker threads, particularly if one such thread was in part CPU bound (inventing this use case is left as an exercise for the reader). -- Alex Bligh
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
Il 11/11/2013 18:59, Alex Bligh ha scritto: Why is it necessary to push this task down into the host? I don't understand the advantage of this approach except that maybe it works around certain misconfigurations, I/O scheduler quirks, or plain old bugs - all of which should be investigated and fixed at the source instead of adding another layer of code to mask them. I can see an argument why a guest with two very differently performing disks attached might be best served by two worker threads, particularly if one such thread was in part CPU bound (inventing this use case is left as an exercise for the reader). In most cases you want to use aio=native anyway, and then the QEMU thread pool is entirely bypassed. Paolo
Re: [Qemu-devel] audit needed for signal handlers
On Mon, Nov 11, 2013 at 8:50 PM, Eric Blake ebl...@redhat.com wrote: Quick - identify the bug in this code (from ui/curses.c): static void curses_winch_handler(int signum) { struct winsize { unsigned short ws_row; unsigned short ws_col; unsigned short ws_xpixel; /* unused */ unsigned short ws_ypixel; /* unused */ } ws; /* terminal size changed */ if (ioctl(1, TIOCGWINSZ, ws) == -1) An unsafe function is called in a signal. See man 7 signal, section 'Async-signal-safe functions'. This should be avoided. return; resize_term(ws.ws_row, ws.ws_col); curses_calc_pad(); invalidate = 1; /* some systems require this */ signal(SIGWINCH, curses_winch_handler); } Here's a hint: ioctl() can clobber errno. I believe it cannot, at least in linux, as technically the signal handler is always called in a new thread, specifically created to only handle that signal, and errno should be thread-local. But if a signal handler is called in the middle of other code that is using errno, then the handler MUST restore the value of errno before returning, if it is to guarantee that the interrupted context won't be corrupted. More reading on the topic: https://plus.google.com/+LennartPoetteringTheOneAndOnly/posts/gHSscCJkakd I have not done a full audit of qemu's signal handlers, so much as a quick look to see if I could find violations; it was surprisingly easy to find a bad example. A signal handler that resets the signal to SIG_DFL then calls raise() is exempt from caring about errno, but any signal handler that can fall through to the end and return execution to the caller MUST ensure that errno is left unchanged, for errno to be useful in the remaining body of code. Which is why the best signal handlers tend to be the one that only flag a volatile variable that is later checked at safe points of execution, rather than trying to make complex calls from within the handler context. -- Thanks. -- Max
[Qemu-devel] [PATCH] A hexdump function that also displays UTF-8 strings contained in the dumped buffer.
This function is used by a forthcomingQemu monitor command that dumps contents of OpenFirmware Device Trees. It dumps contents of a buffer as hex in the same format as the existing function but also also appends any UTF-8 strings in human-readable format. Like the existing hexdump function, this function may be used elsewhere in Qemu, and it shares the same prototype as the existing function. In both functions, check for a NULL prefix parameter and omit printing the prefix if it is null. Here is a sample of the output of both functions with no prefix string: : 61 62 33 64 62 65 65 66 65 62 34 64 66 62 65 03 0010: 67 62 35 64 68 01 05 03 69 62 36 64 6a 01 06 03 0020: 6b 62 37 64 6c 01 07 03 6d 62 38 64 6e 01 08 03 0030: 6f 62 39 64 70 01 09 03 71 62 78 64 : 61 62 33 64 62 65 65 66 65 62 34 64 66 62 65 03 ab3dbeefeb4dfbe. 0010: 67 62 35 64 68 01 05 03 69 62 36 64 6a 01 06 03 gb5dh...ib6dj... 0020: 6b 62 37 64 6c 01 07 03 6d 62 38 64 6e 01 08 03 kb7dl...mb8dn... 0030: 6f 62 39 64 70 01 09 03 71 62 78 64ob9dp...qbxd Signed-off-by: Mike Day ncm...@ncultra.org --- include/qemu-common.h | 2 ++ util/hexdump.c| 48 +++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/include/qemu-common.h b/include/qemu-common.h index 5054836..7b8e2b9 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -435,6 +435,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end); */ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); +/* include any strings alongside the hex output */ +void qemu_hexdump_str(gchar *buf, FILE *fp, const gchar *prefix, size_t len); /* vector definitions */ #ifdef __ALTIVEC__ diff --git a/util/hexdump.c b/util/hexdump.c index 969b340..a920c81 100644 --- a/util/hexdump.c +++ b/util/hexdump.c @@ -21,7 +21,11 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) for (b = 0; b size; b++) { if ((b % 16) == 0) { -fprintf(fp, %s: %04x:, prefix, b); +if (prefix) { +fprintf(fp, %s: %04x:, prefix, b); +} else { +fprintf(fp, %04x:, b); +} } if ((b % 4) == 0) { fprintf(fp, ); @@ -35,3 +39,45 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) fprintf(fp, \n); } } + +/* print any strings along side the hex dump */ +void qemu_hexdump_str(gchar *buf, FILE *fp, const gchar *prefix, size_t len) +{ + +gchar *inp, *linep; +int i, offset = 0; +inp = linep = buf; + +do { +if (prefix) { +fprintf(fp, %s: %04x: , prefix, offset); +} else { +fprintf(fp, %04x: , offset); +} +for (i = 0; i 16 len 0; i++, len--, offset++, inp++) { +if (i !(i % 4)) { +fprintf(fp, ); +} +fprintf(fp, %02hx , *inp); +} +int j; +if (i 16) { +for (j = 16 - i; j; --j) { +fprintf(fp,); +if (j (!(j % 4))) { +fprintf(fp, ); +} +} +} +fprintf(fp, ); +for (j = 0; j i; j++) { +if (*(linep + j) 0x20 || *(linep + j) 0x7e) { +fprintf(fp, %c, '.'); +} else { +fprintf(fp, %c, *(linep + j)); +} +} +fprintf(fp, \n); +linep = inp; +} while (len); +}
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
On 11 Nov 2013, at 18:01, Paolo Bonzini wrote: Il 11/11/2013 18:59, Alex Bligh ha scritto: Why is it necessary to push this task down into the host? I don't understand the advantage of this approach except that maybe it works around certain misconfigurations, I/O scheduler quirks, or plain old bugs - all of which should be investigated and fixed at the source instead of adding another layer of code to mask them. I can see an argument why a guest with two very differently performing disks attached might be best served by two worker threads, particularly if one such thread was in part CPU bound (inventing this use case is left as an exercise for the reader). In most cases you want to use aio=native anyway, and then the QEMU thread pool is entirely bypassed. 'most cases' - really? I thought anything using either qcow2 or ceph won't support that? Also I am guessing if aio=native is used then it by definition won't be CPU bound... :-) From one of the Edinburgh presentations I had thought we were going towards everything using the thread pool (subject to appropriate rearchitecture). -- Alex Bligh
Re: [Qemu-devel] [PATCH v2 0/3] Make thread pool implementation modular
Il 11/11/2013 19:32, Alex Bligh ha scritto: On 11 Nov 2013, at 18:01, Paolo Bonzini wrote: Il 11/11/2013 18:59, Alex Bligh ha scritto: Why is it necessary to push this task down into the host? I don't understand the advantage of this approach except that maybe it works around certain misconfigurations, I/O scheduler quirks, or plain old bugs - all of which should be investigated and fixed at the source instead of adding another layer of code to mask them. I can see an argument why a guest with two very differently performing disks attached might be best served by two worker threads, particularly if one such thread was in part CPU bound (inventing this use case is left as an exercise for the reader). In most cases you want to use aio=native anyway, and then the QEMU thread pool is entirely bypassed. 'most cases' - really? I thought anything using either qcow2 or ceph won't support that? qcow2 works very well with aio=native. ceph, libiscsi, gluster, etc. will not support aio=native indeed, but then they won't use the thread pool either so I wasn't thinking about them (only files and block devices). Paolo
Re: [Qemu-devel] [RFC] target-arm: provide skeleton for a64 insn decoding
On 11/12/2013 01:13 AM, Claudio Fontana wrote: +/* C3.2 Branches, exception generating and system instructions */ +static void disas_b_exc_sys(DisasContext *s, uint32_t insn) +{ +switch (extract32(insn, 25, 7)) { +case 0x0a: case 0x4a: /* Unconditional branch (immediate) */ +disas_uncond_b_imm(s, insn); +break; Bit 25 is - for unconditional branch, so this entry should be 0x0a, 0x0b, 0x4a, 0x4b All of the other decodings look good. r~
Re: [Qemu-devel] [PATCH 1/7] usb: remove old usb-host code
Hi, On 11/11/2013 09:47 AM, Gerd Hoffmann wrote: On Fr, 2013-11-08 at 17:51 +0100, Jan Kiszka wrote: On 2013-11-08 16:39, Gerd Hoffmann wrote: Hi, OK, then here is the first issue I ran into while trying libusbx (git head, i.e. 1.0.17+: The new stack causes significant latency issues that makes it almost unusable for pass-through of USB audio devices (some headset in my case). Reverting usb-linux and disabling libusb over QEMU git head makes things work again. I'll have to stick with this for now as it is affecting my work environment. Any spontaneous ideas how to analyse or even resolve this? Try setting isobsize property to something smaller than 32 (which is the default). OK, isobsize=2 and isobufs=32 helped, possibly other combinations as well - but not just reducing isobsize or increasing isobufs. Any theory about this? How can we find better defaults? isobsize is the size of a single buffer (in MaxPacketSize units). isobufs is the number of buffers in the ring. So the total ring buffer size is MaxPacketSize * isobsize * isobufs. isobsize basically trades overhead for latency. Larger numbers reduce the overhead, smaller numbers reduce latency. isobufs should be as small as possible. Start with 4 (default). If you get overruns/underruns increase. We should probably look at the endpoint interval, then calculate how many packets we should expect within a certain time range and use that as additional factor for the buffer size. That should get the defaults closer to the actual needs of the device. hw/usb/redirect.c actually has some code for that, you could use that as a start for the host redirection code... Regards, Hans
Re: [Qemu-devel] dump-guest-memory enhancement.
On 11/11/13 04:28, Phi Debian wrote: CU82$ /usr/bin/readelf -a vmcore Program Headers: Type Offset VirtAddrPhysAddr FileSizMemSiz Flg Align NOTE 0x74 0x 0x 0x000a0 0x000a0 0 LOAD 0x000114 0x6000 0x6000 0x4000 0x4000 0 The Align fot the PT_LOAD is ZERO, then the offset is 0x114, having an Align set to TARGET_PAGE_BITS, (or at least 4Kb) would provide a chance for any debugger to do page align copy (either lseek/read, or mmap) as they trip on the core, marginal detail, but may help. I checked http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html, and now it starts to make sense to me. But first, I think you meant TARGET_PAGE_SIZE, not TARGET_PAGE_BITS, for the p_align field. So, the specs say for p_align: As ``Program Loading'' describes in this chapter of the processor supplement, loadable process segments must have congruent values for p_vaddr and p_offset, modulo the page size. This member gives the value to which the segments are aligned in memory and in the file. Values 0 and 1 mean no alignment is required. Otherwise, p_align should be a positive, integral power of 2, and p_vaddr should equal p_offset, modulo p_align. In one sentence, p_vaddr and p_offset should produce the same remainder when divided by p_align. I guess you could implement this by adding some buffer space between the headers and the meat of the file. IMHO this requirement isn't necessarily a hard one for vmcores, as we're not trying to map-and-execute these segments. As an example, a userland main(){abort();} kind of prog would produce a core file like this. CM01$ readelf -a core.2000 ... LOAD off0x1000 vaddr 0x0040 paddr 0x align 2**12 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align NOTE 0x0001d4 0x 0x 0x001d8 0x0 0 LOAD 0x001000 0x00a42000 0x 0x0 0x1b000 R E 0x1000 LOAD 0x002000 0x00a5e000 0x 0x01000 0x01000 RW 0x1000 The align for LOAD's is 0x1000 thus the file offset is 0x01000, 0x2000 etc... Understood. Seems to be consistent with the above. I guess dump-guest-memory is of a marginal use, Strongly disagree :) yet it can be usefull when kexec/kdump is broken or non existant on some new architecture (os/arch bring up). It's also important when you want to control the dumping process from the host side (including the location of the vmcore). You might want to grab a dump without a guest kernel panic too. So to answer your question, the content of the PT_LOAD is ok, only its offset is non aligned. I actually meant the question the other way around. Apparently, the offset of the PT_LOAD entry itself (ie. where it is in the file) is OK; its contents could be improved perhaps (the p_offset and p_align fields). I got to precise I obtained this vmcore by implementing the arc_arm part of the qemu dump-guest-memory, and planing to do the same for arm64, I may have mis-used the QEMU API's, but for what I can read, the align member is left non initialised after a memset(zero) of the phdr/shdr i.e it is left at zero, and I got the impression that the wayt the elf is produced, section/progs alignment was not in mind. I haven't been there at the original creation of this functionality, but I tend to agree with you. For analyzing the vmcore with gdb or crash, the alignment doesn't seem to be important, so it was probably ignored. So I guess other arch are not aligned either, I did not test that. It seems to be so, yes. I think that implementing the arch-specific bits (currently: stubs) for a new architectire, for dump-guest-memory, would be very useful. OTOH reworking the generic dump code for the alignment (non-)issue seems to be more risk than worth to me, unless the produced vmcore is actually unusable on a specific platform. I'm not opposing or trying to block it of course, it's just plain ol' don't fix it if it ain't broken and keep the complexity down for me. If I understand correctly, the benefit of getting the alignment pedantically right would be a chance for any debugger to do page align copy (either lseek/read, or mmap). Is that right? I can't imagine it would be noticeable in performance on x86_64 with crash or gdb. Is the performance significantly different on ARM? Thanks Laszlo
Re: [Qemu-devel] dump-guest-memory enhancement.
Hi Laszlo, On Mon, Nov 11, 2013 at 8:38 PM, Laszlo Ersek ler...@redhat.com wrote: But first, I think you meant TARGET_PAGE_SIZE, not TARGET_PAGE_BITS, for the p_align field. So, the specs say for p_align: As ``Program Loading'' describes in this chapter of the processor supplement, loadable process segments must have congruent values for p_vaddr and p_offset, modulo the page size. This member gives the value to which the segments are aligned in memory and in the file. Values 0 and 1 mean no alignment is required. Otherwise, p_align should be a positive, integral power of 2, and p_vaddr should equal p_offset, modulo p_align. In one sentence, p_vaddr and p_offset should produce the same remainder when divided by p_align. I guess you could implement this by adding some buffer space between the headers and the meat of the file. I said TARGET_PAGE_BITS, because as you noticed in your spec text, p_align is a power of 2, if you want a 4096 byte aligned section (or prog header) you would stick 12 in there, i.e the number of bits. IMHO this requirement isn't necessarily a hard one for vmcores, as we're not trying to map-and-execute these segments. Not map-and-execute, but map-and-read sure enough :) I think that implementing the arch-specific bits (currently: stubs) for a new architectire, for dump-guest-memory, would be very useful. OTOH reworking the generic dump code for the alignment (non-)issue seems to be more risk than worth to me, unless the produced vmcore is actually unusable on a specific platform. I'm not opposing or trying to block it of course, it's just plain ol' don't fix it if it ain't broken and keep the complexity down for me. Yes, when fixing code you may simply break it :) If I understand correctly, the benefit of getting the alignment pedantically right would be a chance for any debugger to do page align copy (either lseek/read, or mmap). Is that right? yes. I can't imagine it would be noticeable in performance on x86_64 with crash or gdb. Is the performance significantly different on ARM? When number of crashdump pages to bring in start to be 'big' having it non aligned double the number of page faults, if the panic dump path would generate non aligned gigantic dump I would voice a bit more :) For qemu, we can safely ignore all this, as cross-arch qemu is mainly for bring up, and then I guess it will never run gigantic models, so never dump gigantic dump. You right, qemu dump-guest-memory is much more than a 'dump on panic' is more like get a 'live-dump' you can snapshot at different point of time in your OS load. So in conclusion we stay the way it is, the ELF is semantically correct and debuggers can deal with it. Thanx for taking time to look at it. Cheers Phi
Re: [Qemu-devel] [PATCH] A hexdump function that also displays UTF-8 strings contained in the dumped buffer.
On Mon, Nov 11, 2013 at 10:29 AM, Mike Day ncm...@ncultra.org wrote: This function is used by a forthcomingQemu monitor command that dumps contents of OpenFirmware Device Trees. It dumps contents of a buffer as hex in the same format as the existing function but also also appends any UTF-8 strings in human-readable format. Like the existing hexdump function, this function may be used elsewhere in Qemu, and it shares the same prototype as the existing function. In both functions, check for a NULL prefix parameter and omit printing the prefix if it is null. Here is a sample of the output of both functions with no prefix string: : 61 62 33 64 62 65 65 66 65 62 34 64 66 62 65 03 0010: 67 62 35 64 68 01 05 03 69 62 36 64 6a 01 06 03 0020: 6b 62 37 64 6c 01 07 03 6d 62 38 64 6e 01 08 03 0030: 6f 62 39 64 70 01 09 03 71 62 78 64 : 61 62 33 64 62 65 65 66 65 62 34 64 66 62 65 03 ab3dbeefeb4dfbe. 0010: 67 62 35 64 68 01 05 03 69 62 36 64 6a 01 06 03 gb5dh...ib6dj... 0020: 6b 62 37 64 6c 01 07 03 6d 62 38 64 6e 01 08 03 kb7dl...mb8dn... 0030: 6f 62 39 64 70 01 09 03 71 62 78 64ob9dp...qbxd Signed-off-by: Mike Day ncm...@ncultra.org --- include/qemu-common.h | 2 ++ util/hexdump.c| 48 +++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/include/qemu-common.h b/include/qemu-common.h index 5054836..7b8e2b9 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -435,6 +435,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end); */ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); +/* include any strings alongside the hex output */ +void qemu_hexdump_str(gchar *buf, FILE *fp, const gchar *prefix, size_t len); /* vector definitions */ #ifdef __ALTIVEC__ diff --git a/util/hexdump.c b/util/hexdump.c index 969b340..a920c81 100644 --- a/util/hexdump.c +++ b/util/hexdump.c @@ -21,7 +21,11 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) for (b = 0; b size; b++) { if ((b % 16) == 0) { -fprintf(fp, %s: %04x:, prefix, b); +if (prefix) { +fprintf(fp, %s: %04x:, prefix, b); +} else { +fprintf(fp, %04x:, b); +} } if ((b % 4) == 0) { fprintf(fp, ); @@ -35,3 +39,45 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) fprintf(fp, \n); } } + +/* print any strings along side the hex dump */ +void qemu_hexdump_str(gchar *buf, FILE *fp, const gchar *prefix, size_t len) +{ + +gchar *inp, *linep; +int i, offset = 0; +inp = linep = buf; + +do { +if (prefix) { +fprintf(fp, %s: %04x: , prefix, offset); +} else { +fprintf(fp, %04x: , offset); +} +for (i = 0; i 16 len 0; i++, len--, offset++, inp++) { +if (i !(i % 4)) { +fprintf(fp, ); +} +fprintf(fp, %02hx , *inp); +} +int j; +if (i 16) { +for (j = 16 - i; j; --j) { +fprintf(fp,); +if (j (!(j % 4))) { +fprintf(fp, ); +} +} +} +fprintf(fp, ); +for (j = 0; j i; j++) { +if (*(linep + j) 0x20 || *(linep + j) 0x7e) { You can use qemu_isprint() for this. +fprintf(fp, %c, '.'); +} else { +fprintf(fp, %c, *(linep + j)); Even though the comment says UTF-8, this isn't actually handling UTF-8. Just ascii. You should fold this into whatever forthcoming patch you are submitting. Regards, Anthony Liguori +} +} +fprintf(fp, \n); +linep = inp; +} while (len); +}