[Qemu-devel] [PATCH v9 05/11] block: Add bdrv_set_backing_hd()
This is the common but non-trivial steps to assign or change the backing_hd of BDS. Signed-off-by: Fam Zheng --- block.c | 34 -- include/block/block.h | 1 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index b122154..ff25749 100644 --- a/block.c +++ b/block.c @@ -958,6 +958,29 @@ fail: return ret; } +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +{ +if (bs->backing_hd) { +bdrv_unref(bs->backing_hd); +} + +bs->backing_hd = backing_hd; +if (!backing_hd) { +bs->backing_file[0] = '\0'; +bs->backing_format[0] = '\0'; +return; +} +pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_file), +backing_hd->drv ? backing_hd->drv->format_name : ""); +bdrv_ref(bs->backing_hd); + +pstrcpy(bs->backing_file, sizeof(bs->backing_file), +bs->backing_hd->file->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_format), +bs->backing_hd->drv->format_name); +} + /* * Opens the backing file for a BlockDriverState if not yet open * @@ -971,6 +994,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) char backing_filename[PATH_MAX]; int back_flags, ret; BlockDriver *back_drv = NULL; +BlockDriverState *backing_hd; Error *local_err = NULL; if (bs->backing_hd != NULL) { @@ -994,7 +1018,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) sizeof(backing_filename)); } -bs->backing_hd = bdrv_new(""); +backing_hd = bdrv_new(""); if (bs->backing_format[0] != '\0') { back_drv = bdrv_find_format(bs->backing_format); @@ -1004,20 +1028,18 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ); -ret = bdrv_open(bs->backing_hd, +ret = bdrv_open(backing_hd, *backing_filename ? backing_filename : NULL, options, back_flags, back_drv, &local_err); if (ret < 0) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_unref(backing_hd); bs->open_flags |= BDRV_O_NO_BACKING; error_setg(errp, "Could not open backing file: %s", error_get_pretty(local_err)); error_free(local_err); return ret; } -pstrcpy(bs->backing_file, sizeof(bs->backing_file), -bs->backing_hd->file->filename); +bdrv_set_backing_hd(bs, backing_hd); return 0; } diff --git a/include/block/block.h b/include/block/block.h index ac8976e..20bfcd9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -185,6 +185,7 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, QDict *options, int flags, Error **errp); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, int flags, BlockDriver *drv, Error **errp); -- 1.8.5.1
[Qemu-devel] [PATCH v9 06/11] block: Add backing_blocker in BlockDriverState
This makes use of op_blocker and blocks all the operations except for commit target, on each BlockDriverState->backing_hd. Signed-off-by: Fam Zheng --- block.c | 16 ++-- include/block/block_int.h | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index ff25749..907c483 100644 --- a/block.c +++ b/block.c @@ -961,13 +961,22 @@ fail: void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { if (bs->backing_hd) { +assert(error_is_set(&bs->backing_blocker)); +bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); bdrv_unref(bs->backing_hd); +} else if (backing_hd) { +error_setg(&bs->backing_blocker, + "device is used as backing hd of '%s'", + bs->device_name); } bs->backing_hd = backing_hd; if (!backing_hd) { bs->backing_file[0] = '\0'; bs->backing_format[0] = '\0'; +if (error_is_set(&bs->backing_blocker)) { +error_free(bs->backing_blocker); +} return; } pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); @@ -975,6 +984,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) backing_hd->drv ? backing_hd->drv->format_name : ""); bdrv_ref(bs->backing_hd); +bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); +/* Otherwise we won't be able to commit due to check in bdrv_commit */ +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1481,8 +1494,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_set_backing_hd(bs, NULL); } bs->drv->bdrv_close(bs); g_free(bs->opaque); diff --git a/include/block/block_int.h b/include/block/block_int.h index 2f6556d..1ac17d5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -341,6 +341,9 @@ struct BlockDriverState { BlockJob *job; QDict *options; + +/* The error object in use for blocking operations on backing_hd */ +Error *backing_blocker; }; int get_tmp_filename(char *filename, int size); -- 1.8.5.1
[Qemu-devel] [PATCH v9 08/11] block: Support dropping active in bdrv_drop_intermediate
Dropping intermediate could be useful both for commit and stream, and BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs to work with op blockers. Signed-off-by: Fam Zheng --- block.c| 145 ++--- block/commit.c | 1 + 2 files changed, 66 insertions(+), 80 deletions(-) diff --git a/block.c b/block.c index 618b8c3..b28fb93 100644 --- a/block.c +++ b/block.c @@ -2190,114 +2190,99 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, return overlay; } -typedef struct BlkIntermediateStates { -BlockDriverState *bs; -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; -} BlkIntermediateStates; - - /* - * Drops images above 'base' up to and including 'top', and sets the image - * above 'top' to have base as its backing file. + * Drops images above 'base' up to and including 'top', and sets new 'base' + * as backing_hd of top_overlay (the image orignally has 'top' as backing + * file). top_overlay may be NULL if 'top' is active, no such update needed. + * Requires that the top_overlay to 'top' is opened r/w. * - * Requires that the overlay to 'top' is opened r/w, so that the backing file - * information in 'bs' can be properly updated. + * 1) This will convert the following chain: * - * E.g., this will convert the following chain: - * bottom <- base <- intermediate <- top <- active + * ... <- base <- ... <- top <- overlay <-... <- active * * to * - * bottom <- base <- active + * ... <- base <- overlay <- active * - * It is allowed for bottom==base, in which case it converts: + * 2) It is allowed for bottom==base, in which case it converts: * - * base <- intermediate <- top <- active + * base <- ... <- top <- overlay <- ... <- active * * to * - * base <- active + * base <- overlay <- active + * + * 2) It also allows active==top, in which case it converts: + * + * ... <- base <- ... <- top (active) + * + * to + * + * ... <- base == active == top + * + * i.e. only base and lower remains: *top == *base when return. + * + * 3) If base==NULL, it will drop all the BDS below overlay and set its + * backing_hd to NULL. I.e.: + * + * base(NULL) <- ... <- overlay <- ... <- active + * + * to * - * Error conditions: - * if active == top, that is considered an error + * overlay <- ... <- active * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base) { -BlockDriverState *intermediate; -BlockDriverState *base_bs = NULL; -BlockDriverState *new_top_bs = NULL; -BlkIntermediateStates *intermediate_state, *next; -int ret = -EIO; - -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; -QSIMPLEQ_INIT(&states_to_delete); - -if (!top->drv || !base->drv) { -goto exit; -} - -new_top_bs = bdrv_find_overlay(active, top); +BlockDriverState *drop_start, *overlay; +int ret = -EINVAL; -if (new_top_bs == NULL) { -/* we could not find the image above 'top', this is an error */ +if (!top->drv || (base && !base->drv)) { goto exit; } - -/* special case of new_top_bs->backing_hd already pointing to base - nothing - * to do, no intermediate images */ -if (new_top_bs->backing_hd == base) { +if (top == base) { ret = 0; -goto exit; -} - -intermediate = top; - -/* now we will go down through the list, and add each BDS we find - * into our deletion queue, until we hit the 'base' - */ -while (intermediate) { -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); -intermediate_state->bs = intermediate; -QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); - -if (intermediate->backing_hd == base) { -base_bs = intermediate->backing_hd; -break; +} else if (top == active) { +assert(base); +drop_start = active->backing_hd; +bdrv_swap(active, base); +base->backing_hd = NULL; +bdrv_unref(drop_start); +ret = 0; +} else { +/* If there's an overlay, its backing_hd points to top's BDS now, + * the top image is dropped but this BDS structure is kept and swapped + * with base, this way we keep the pointers valid after dropping top */ +overlay = bdrv_find_overlay(active, top); +if (!overlay) { +goto exit; +} +if (base) { +ret = bdrv_change_backing_file(overlay, base->filename, + base->drv->format_
[Qemu-devel] [PATCH v9 09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images
This reuses the new bdrv_drop_intermediate. Signed-off-by: Fam Zheng --- block/stream.c | 28 +--- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/block/stream.c b/block/stream.c index 46bec7d..9cdcf0e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -51,32 +51,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } -static void close_unused_images(BlockDriverState *top, BlockDriverState *base, -const char *base_id) -{ -BlockDriverState *intermediate; -intermediate = top->backing_hd; - -/* Must assign before bdrv_delete() to prevent traversing dangling pointer - * while we delete backing image instances. - */ -top->backing_hd = base; - -while (intermediate) { -BlockDriverState *unused; - -/* reached base */ -if (intermediate == base) { -break; -} - -unused = intermediate; -intermediate = intermediate->backing_hd; -unused->backing_hd = NULL; -bdrv_unref(unused); -} -} - static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; @@ -190,7 +164,7 @@ wait: } } ret = bdrv_change_backing_file(bs, base_id, base_fmt); -close_unused_images(bs, base, base_id); +bdrv_drop_intermediate(bs, bs->backing_hd, base); } qemu_vfree(buf); -- 1.8.5.1
[Qemu-devel] [PATCH v9 10/11] qmp: Add command 'blockdev-backup'
Similar to drive-backup, but this command uses a device id as target instead of creating/opening an image file. Also add blocker on target bs, since the target is also a named device now. Signed-off-by: Fam Zheng --- block/backup.c | 21 + blockdev.c | 47 +++ qapi-schema.json | 49 + qmp-commands.hx | 44 4 files changed, 161 insertions(+) diff --git a/block/backup.c b/block/backup.c index 0198514..c8fe1a9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -339,6 +339,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); +bdrv_op_unblock_all(target, job->common.blocker); bdrv_unref(target); block_job_completed(&job->common, ret); @@ -364,6 +365,24 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +if (!bdrv_is_inserted(bs)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bs->device_name); +return; +} + +if (!bdrv_is_inserted(target)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, target->device_name); +return; +} + +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +return; +} + +if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { +return; +} + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -377,6 +396,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +bdrv_op_block_all(target, job->common.blocker); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; diff --git a/blockdev.c b/blockdev.c index 7f305d8..5627e5d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1872,6 +1872,8 @@ void qmp_drive_backup(const char *device, const char *target, return; } +/* Although backup_run has this check too, we need to use bs->drv below, so + * do an early check redundantly. */ if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); return; @@ -1888,6 +1890,7 @@ void qmp_drive_backup(const char *device, const char *target, } } +/* Early check to avoid creating target */ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { return; } @@ -1946,6 +1949,50 @@ void qmp_drive_backup(const char *device, const char *target, } } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ +BlockDriverState *bs; +BlockDriverState *target_bs; +Error *local_err = NULL; + +if (!has_speed) { +speed = 0; +} +if (!has_on_source_error) { +on_source_error = BLOCKDEV_ON_ERROR_REPORT; +} +if (!has_on_target_error) { +on_target_error = BLOCKDEV_ON_ERROR_REPORT; +} + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +target_bs = bdrv_find(target); +if (!target_bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, target); +return; +} + +bdrv_ref(target_bs); +backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); +if (local_err != NULL) { +bdrv_unref(target_bs); +error_propagate(errp, local_err); +} +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, diff --git a/qapi-schema.json b/qapi-schema.json index 288d024..e7c37c8 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1869,6 +1869,40 @@ '*on-target-error': 'BlockdevOnError' } } ## +# @BlockdevBackup +# +# @device: the name of the device which should be copied. +# +# @target: the name of the backup target device. +# +# @sync: what parts of the disk image should be copied to the destination +#(all the disk, only the sectors allocated in the topmost image, or +#only new I/O). +# +# @speed: #optional the maximum speed, in bytes per second. +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block devi
[Qemu-devel] [PATCH v9 11/11] block: Allow backup on referenced named BlockDriverState
Drive backup is a read only operation on source bs. We want to allow this specific case to enable image-fleecing. Note that when image-fleecing job starts, the job still add its blocker to source bs, and any other operation on it will be blocked by that. Signed-off-by: Fam Zheng --- block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block.c b/block.c index b28fb93..1bc00ef 100644 --- a/block.c +++ b/block.c @@ -988,6 +988,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) /* Otherwise we won't be able to commit due to check in bdrv_commit */ bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, bs->backing_blocker); +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), -- 1.8.5.1
[Qemu-devel] [PATCH v9 04/11] block: Move op_blocker check from block_job_create to its caller
It makes no sense to check for "any" blocker on bs, we are here only because of the mechanical conversion from in_use to op_blockers. Remove it now, and let the callers check specific operation types. Backup and mirror already have it, add checker to stream and commit. Signed-off-by: Fam Zheng --- blockdev.c | 8 blockjob.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 9d36775..7f305d8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1754,6 +1754,10 @@ void qmp_block_stream(const char *device, bool has_base, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) { +return; +} + if (base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { @@ -1794,6 +1798,10 @@ void qmp_block_commit(const char *device, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { +return; +} + /* default top_bs is the active layer */ top_bs = bs; diff --git a/blockjob.c b/blockjob.c index f1ff036..21e21c0 100644 --- a/blockjob.c +++ b/blockjob.c @@ -41,7 +41,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, { BlockJob *job; -if (bs->job || !bdrv_op_blocker_is_empty(bs)) { +if (bs->job) { error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } -- 1.8.5.1
[Qemu-devel] [PATCH v9 07/11] block: Parse "backing" option to reference existing BDS
Now it's safe to allow reference for backing_hd in the interface. Signed-off-by: Fam Zheng --- block.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 907c483..618b8c3 100644 --- a/block.c +++ b/block.c @@ -1199,12 +1199,34 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* If there is a backing file, use it */ if ((flags & BDRV_O_NO_BACKING) == 0) { QDict *backing_options; +const char *backing_name; +BlockDriverState *backing_hd; +backing_name = qdict_get_try_str(options, "backing"); qdict_extract_subqdict(options, &backing_options, "backing."); -ret = bdrv_open_backing_file(bs, backing_options, &local_err); -if (ret < 0) { + +if (backing_name && qdict_size(backing_options)) { +error_setg(&local_err, + "Option \"backing\" and \"backing.*\" cannot be " + "used together"); +ret = -EINVAL; goto close_and_fail; } +if (backing_name) { +backing_hd = bdrv_find(backing_name); +if (!backing_hd) { +error_set(&local_err, QERR_DEVICE_NOT_FOUND, backing_name); +ret = -ENOENT; +goto close_and_fail; +} +qdict_del(options, "backing"); +bdrv_set_backing_hd(bs, backing_hd); +} else { +ret = bdrv_open_backing_file(bs, backing_options, &local_err); +if (ret < 0) { +goto close_and_fail; +} +} } /* Check if any unknown options were used */ -- 1.8.5.1
Re: [Qemu-devel] [PATCH v8 02/12] qapi: Add BlockOperationType enum
On 2014年01月08日 11:26, Stefan Hajnoczi wrote: On Wed, Jan 08, 2014 at 10:28:59AM +0800, Fam Zheng wrote: On 2014年01月03日 18:09, Stefan Hajnoczi wrote: On Fri, Dec 13, 2013 at 03:35:10PM +0800, Fam Zheng wrote: This adds the enum of all the operations that can be taken on a block device. Signed-off-by: Fam Zheng --- qapi-schema.json | 50 ++ 1 file changed, 50 insertions(+) diff --git a/qapi-schema.json b/qapi-schema.json index d6f8615..8e982a2 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1440,6 +1440,56 @@ 'data': ['commit', 'stream', 'mirror', 'backup'] } ## +# @BlockOperationType +# +# Type of a block operation. (since 2.0) Why is this exposed in qapi-schema.json? The blockers concept is internal to QEMU and not exposed via QMP. I plan to add it into block information (query-block, for example). But in follow up patches. It could be useful for user to check which commands/operations are possible without trial-and-fail, when we put more and more complicated state and configuration into the BDS graph. Can you make it internal for now and expose via QMP if necessary later? I think we shouldn't commit to public APIs unless they are ready and provide a useful service, since we cannot change them. OK, works for me. Fam
[Qemu-devel] [PATCH v10 00/11] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD
This series adds for point-in-time snapshot NBD exporting based on blockdev-backup (variant of drive-backup with existing device as target). We get a thin point-in-time snapshot by COW mechanism of drive-backup, and export it through built in NBD server. The steps are as below: 1. (SHELL) qemu-img create -f qcow2 BACKUP.qcow2 (Alternatively we can use -o backing_file=RUNNING-VM.img to omit explicitly providing the size by ourselves, but it's risky because RUNNING-VM.qcow2 is used r/w by guest. Whether or not setting backing file in the image file doesn't matter, as we are going to override the backing hd in the next step) 2. (QMP) blockdev-add backing=source-drive file.driver=file file.filename=BACKUP.qcow2 id=target0 if=none driver=qcow2 (where source-drive is the running BlockDriverState name for RUNNING-VM.img. This patch implements "backing=" option to override backing_hd for added drive) 3. (QMP) blockdev-backup device=source-drive sync=none target=target0 (this is the QMP command introduced by this series, which use a named device as target of drive-backup) 4. (QMP) nbd-server-add device=target0 When image fleecing done: 1. (QMP) block-job-cancel device=source-drive 2. (HMP) drive_del target0 3. (SHELL) rm BACKUP.qcow2 v8 -> v10: Rebased to qemu.git. Address Stefan's comments: [01/11] block: Add BlockOpType enum Change enum definition as internal. [05/11] block: Add bdrv_set_backing_hd() Set bs->backing_file and bs->backing_format. [06/11] block: Add backing_blocker in BlockDriverState Reuse bdrv_set_backing_hd(). [07/11] block: Parse "backing" option to reference existing BDS Fix use-after-free. Check for "backing=" and "backing.file=" conflict. Remove unintended bdrv_swap hunks. [08/11] block: Support dropping active in bdrv_drop_intermediate Fix function comment. [09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images Fam Zheng (11): block: Add BlockOpType enum block: Introduce op_blockers to BlockDriverState block: Replace in_use with operation blocker block: Move op_blocker check from block_job_create to its caller block: Add bdrv_set_backing_hd() block: Add backing_blocker in BlockDriverState block: Parse "backing" option to reference existing BDS block: Support dropping active in bdrv_drop_intermediate stream: Use bdrv_drop_intermediate and drop close_unused_images qmp: Add command 'blockdev-backup' block: Allow backup on referenced named BlockDriverState block-migration.c | 7 +- block.c | 306 +++- block/backup.c | 21 +++ block/commit.c | 1 + block/stream.c | 28 +--- blockdev.c | 70 +++-- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 ++- include/block/block.h | 29 +++- include/block/block_int.h | 9 +- include/block/blockjob.h| 3 + qapi-schema.json| 49 +++ qmp-commands.hx | 44 ++ 13 files changed, 446 insertions(+), 154 deletions(-) -- 1.8.5.1
[Qemu-devel] [PATCH v10 01/11] block: Add BlockOpType enum
This adds the enum of all the operations that can be taken on a block device. Signed-off-by: Fam Zheng --- include/block/block.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index 36efaea..2bc39fe 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -151,6 +151,25 @@ typedef struct BDRVReopenState { void *opaque; } BDRVReopenState; +/* + * Block operation types + */ +typedef enum BlockOpType { +BLOCK_OP_TYPE_BACKUP_SOURCE, +BLOCK_OP_TYPE_BACKUP_TARGET, +BLOCK_OP_TYPE_CHANGE, +BLOCK_OP_TYPE_COMMIT, +BLOCK_OP_TYPE_DATAPLANE, +BLOCK_OP_TYPE_DRIVE_DEL, +BLOCK_OP_TYPE_EJECT, +BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, +BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, +BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, +BLOCK_OP_TYPE_MIRROR, +BLOCK_OP_TYPE_RESIZE, +BLOCK_OP_TYPE_STREAM, +BLOCK_OP_TYPE_MAX, +} BlockOpType; void bdrv_iostatus_enable(BlockDriverState *bs); void bdrv_iostatus_reset(BlockDriverState *bs); -- 1.8.5.1
[Qemu-devel] [PATCH v10 03/11] block: Replace in_use with operation blocker
This drops BlockDriverState.in_use with op_blockers: - Call bdrv_op_block_all in place of bdrv_set_in_use(bs, 1). - Call bdrv_op_unblock_all in place of bdrv_set_in_use(bs, 0). - Check bdrv_op_is_blocked() in place of bdrv_in_use(bs). The specific types are used, e.g. in place of starting block backup, bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP, ...). - Check bdrv_op_blocker_is_empty() in place of assert(!bs->in_use). Note: there is only bdrv_op_block_all and bdrv_op_unblock_all callers at this moment. So although the checks are specific to op types, this changes can still be seen as identical logic with previously with in_use. The difference is error message are improved because of blocker error info. Signed-off-by: Fam Zheng --- block-migration.c | 7 +-- block.c | 24 +++- blockdev.c | 15 ++- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 --- include/block/block.h | 2 -- include/block/block_int.h | 1 - include/block/blockjob.h| 3 +++ 8 files changed, 42 insertions(+), 43 deletions(-) diff --git a/block-migration.c b/block-migration.c index 897fdba..bf9a25f 100644 --- a/block-migration.c +++ b/block-migration.c @@ -59,6 +59,7 @@ typedef struct BlkMigDevState { unsigned long *aio_bitmap; int64_t completed_sectors; BdrvDirtyBitmap *dirty_bitmap; +Error *blocker; } BlkMigDevState; typedef struct BlkMigBlock { @@ -346,7 +347,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); -bdrv_set_in_use(bs, 1); +error_setg(&bmds->blocker, "block device is in use by migration"); +bdrv_op_block_all(bs, bmds->blocker); bdrv_ref(bs); block_mig_state.total_sector_sum += sectors; @@ -584,7 +586,8 @@ static void blk_mig_cleanup(void) blk_mig_lock(); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); -bdrv_set_in_use(bmds->bs, 0); +bdrv_op_unblock_all(bmds->bs, bmds->blocker); +error_free(bmds->blocker); bdrv_unref(bmds->bs); g_free(bmds->aio_bitmap); g_free(bmds); diff --git a/block.c b/block.c index 91cda9c..b122154 100644 --- a/block.c +++ b/block.c @@ -1621,7 +1621,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->refcnt = bs_src->refcnt; /* job */ -bs_dest->in_use = bs_src->in_use; bs_dest->job= bs_src->job; /* keep the same entry in bdrv_states */ @@ -1653,7 +1652,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1672,7 +1671,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1709,7 +1708,7 @@ static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); -assert(!bs->in_use); +assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -1891,7 +1890,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } -if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || +bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { return -EBUSY; } @@ -2924,8 +2924,9 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; -if (bdrv_in_use(bs)) +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { return -EBUSY; +} ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -4705,17 +4706,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } -void bdrv_set_in_use(BlockDriverState *bs, int in_use) -{ -assert(bs->in_use
[Qemu-devel] [PATCH v10 02/11] block: Introduce op_blockers to BlockDriverState
BlockDriverState.op_blockers is an array of lists with BLOCK_OP_TYPE_MAX elements. Each list is a list of blockers of an operation type (BlockOpType), that marks this BDS as currently blocked for a certain type of operation with reason errors stored in the list. The rule of usage is: * BDS user who wants to take an operation should check if there's any blocker of the type with bdrv_op_is_blocked(). * BDS user who wants to block certain types of operation, should call bdrv_op_block (or bdrv_op_block_all to block all types of operations, which is similar to the existing bdrv_set_in_use()). * A blocker is only referenced by op_blockers, so the lifecycle is managed by caller, and shouldn't be lost until unblock, so typically a caller does these: - Allocate a blocker with error_setg or similar, call bdrv_op_block() to block some operations. - Hold the blocker, do his job. - Unblock operations that it blocked, with the same reason pointer passed to bdrv_op_unblock(). - Release the blocker with error_free(). Signed-off-by: Fam Zheng --- block.c | 71 +++ include/block/block.h | 7 + include/block/block_int.h | 5 3 files changed, 83 insertions(+) diff --git a/block.c b/block.c index 64e7d22..91cda9c 100644 --- a/block.c +++ b/block.c @@ -1627,6 +1627,8 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* keep the same entry in bdrv_states */ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), bs_src->device_name); +memcpy(bs_dest->op_blockers, bs_src->op_blockers, + sizeof(bs_dest->op_blockers)); bs_dest->list = bs_src->list; } @@ -4634,6 +4636,75 @@ void bdrv_unref(BlockDriverState *bs) } } +struct BdrvOpBlocker { +Error *reason; +QLIST_ENTRY(BdrvOpBlocker) list; +}; + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) +{ +BdrvOpBlocker *blocker; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); +if (!QLIST_EMPTY(&bs->op_blockers[op])) { +blocker = QLIST_FIRST(&bs->op_blockers[op]); +if (errp) { +*errp = error_copy(blocker->reason); +} +return true; +} +return false; +} + +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) +{ +BdrvOpBlocker *blocker; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + +blocker = g_malloc0(sizeof(BdrvOpBlocker)); +blocker->reason = reason; +QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); +} + +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) +{ +BdrvOpBlocker *blocker, *next; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); +QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { +if (blocker->reason == reason) { +QLIST_REMOVE(blocker, list); +g_free(blocker); +} +} +} + +void bdrv_op_block_all(BlockDriverState *bs, Error *reason) +{ +int i; +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +bdrv_op_block(bs, i, reason); +} +} + +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) +{ +int i; +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +bdrv_op_unblock(bs, i, reason); +} +} + +bool bdrv_op_blocker_is_empty(BlockDriverState *bs) +{ +int i; + +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +if (!QLIST_EMPTY(&bs->op_blockers[i])) { +return false; +} +} +return true; +} + void bdrv_set_in_use(BlockDriverState *bs, int in_use) { assert(bs->in_use != in_use); diff --git a/include/block/block.h b/include/block/block.h index 2bc39fe..e6073e7 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -461,6 +461,13 @@ void bdrv_unref(BlockDriverState *bs); void bdrv_set_in_use(BlockDriverState *bs, int in_use); int bdrv_in_use(BlockDriverState *bs); +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + #ifdef CONFIG_LINUX_AIO int raw_get_aio_fd(BlockDriverState *bs); #else diff --git a/include/block/block_int.h b/include/block/block_int.h index 8b132d7..458acd6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -252,6 +252,8 @@ typedef struct BlockLimits { int opt_transfer_length; } BlockLimits; +typedef struct BdrvOpBlocker BdrvOpBlocker; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates,
[Qemu-devel] [PATCH v10 04/11] block: Move op_blocker check from block_job_create to its caller
It makes no sense to check for "any" blocker on bs, we are here only because of the mechanical conversion from in_use to op_blockers. Remove it now, and let the callers check specific operation types. Backup and mirror already have it, add checker to stream and commit. Signed-off-by: Fam Zheng --- blockdev.c | 8 blockjob.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 9d36775..7f305d8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1754,6 +1754,10 @@ void qmp_block_stream(const char *device, bool has_base, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) { +return; +} + if (base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { @@ -1794,6 +1798,10 @@ void qmp_block_commit(const char *device, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { +return; +} + /* default top_bs is the active layer */ top_bs = bs; diff --git a/blockjob.c b/blockjob.c index f1ff036..21e21c0 100644 --- a/blockjob.c +++ b/blockjob.c @@ -41,7 +41,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, { BlockJob *job; -if (bs->job || !bdrv_op_blocker_is_empty(bs)) { +if (bs->job) { error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } -- 1.8.5.1
[Qemu-devel] [PATCH v10 07/11] block: Parse "backing" option to reference existing BDS
Now it's safe to allow reference for backing_hd in the interface. Signed-off-by: Fam Zheng --- block.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 907c483..618b8c3 100644 --- a/block.c +++ b/block.c @@ -1199,12 +1199,34 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* If there is a backing file, use it */ if ((flags & BDRV_O_NO_BACKING) == 0) { QDict *backing_options; +const char *backing_name; +BlockDriverState *backing_hd; +backing_name = qdict_get_try_str(options, "backing"); qdict_extract_subqdict(options, &backing_options, "backing."); -ret = bdrv_open_backing_file(bs, backing_options, &local_err); -if (ret < 0) { + +if (backing_name && qdict_size(backing_options)) { +error_setg(&local_err, + "Option \"backing\" and \"backing.*\" cannot be " + "used together"); +ret = -EINVAL; goto close_and_fail; } +if (backing_name) { +backing_hd = bdrv_find(backing_name); +if (!backing_hd) { +error_set(&local_err, QERR_DEVICE_NOT_FOUND, backing_name); +ret = -ENOENT; +goto close_and_fail; +} +qdict_del(options, "backing"); +bdrv_set_backing_hd(bs, backing_hd); +} else { +ret = bdrv_open_backing_file(bs, backing_options, &local_err); +if (ret < 0) { +goto close_and_fail; +} +} } /* Check if any unknown options were used */ -- 1.8.5.1
[Qemu-devel] [PATCH v9 03/11] block: Replace in_use with operation blocker
This drops BlockDriverState.in_use with op_blockers: - Call bdrv_op_block_all in place of bdrv_set_in_use(bs, 1). - Call bdrv_op_unblock_all in place of bdrv_set_in_use(bs, 0). - Check bdrv_op_is_blocked() in place of bdrv_in_use(bs). The specific types are used, e.g. in place of starting block backup, bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP, ...). - Check bdrv_op_blocker_is_empty() in place of assert(!bs->in_use). Note: there is only bdrv_op_block_all and bdrv_op_unblock_all callers at this moment. So although the checks are specific to op types, this changes can still be seen as identical logic with previously with in_use. The difference is error message are improved because of blocker error info. Signed-off-by: Fam Zheng --- block-migration.c | 7 +-- block.c | 24 +++- blockdev.c | 15 ++- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 --- include/block/block.h | 2 -- include/block/block_int.h | 1 - include/block/blockjob.h| 3 +++ 8 files changed, 42 insertions(+), 43 deletions(-) diff --git a/block-migration.c b/block-migration.c index 897fdba..bf9a25f 100644 --- a/block-migration.c +++ b/block-migration.c @@ -59,6 +59,7 @@ typedef struct BlkMigDevState { unsigned long *aio_bitmap; int64_t completed_sectors; BdrvDirtyBitmap *dirty_bitmap; +Error *blocker; } BlkMigDevState; typedef struct BlkMigBlock { @@ -346,7 +347,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); -bdrv_set_in_use(bs, 1); +error_setg(&bmds->blocker, "block device is in use by migration"); +bdrv_op_block_all(bs, bmds->blocker); bdrv_ref(bs); block_mig_state.total_sector_sum += sectors; @@ -584,7 +586,8 @@ static void blk_mig_cleanup(void) blk_mig_lock(); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); -bdrv_set_in_use(bmds->bs, 0); +bdrv_op_unblock_all(bmds->bs, bmds->blocker); +error_free(bmds->blocker); bdrv_unref(bmds->bs); g_free(bmds->aio_bitmap); g_free(bmds); diff --git a/block.c b/block.c index 91cda9c..b122154 100644 --- a/block.c +++ b/block.c @@ -1621,7 +1621,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->refcnt = bs_src->refcnt; /* job */ -bs_dest->in_use = bs_src->in_use; bs_dest->job= bs_src->job; /* keep the same entry in bdrv_states */ @@ -1653,7 +1652,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1672,7 +1671,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1709,7 +1708,7 @@ static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); -assert(!bs->in_use); +assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -1891,7 +1890,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } -if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || +bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { return -EBUSY; } @@ -2924,8 +2924,9 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; -if (bdrv_in_use(bs)) +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { return -EBUSY; +} ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -4705,17 +4706,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } -void bdrv_set_in_use(BlockDriverState *bs, int in_use) -{ -assert(bs->in_use
[Qemu-devel] [PATCH v10 09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images
This reuses the new bdrv_drop_intermediate. Signed-off-by: Fam Zheng --- block/stream.c | 28 +--- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/block/stream.c b/block/stream.c index 46bec7d..9cdcf0e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -51,32 +51,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } -static void close_unused_images(BlockDriverState *top, BlockDriverState *base, -const char *base_id) -{ -BlockDriverState *intermediate; -intermediate = top->backing_hd; - -/* Must assign before bdrv_delete() to prevent traversing dangling pointer - * while we delete backing image instances. - */ -top->backing_hd = base; - -while (intermediate) { -BlockDriverState *unused; - -/* reached base */ -if (intermediate == base) { -break; -} - -unused = intermediate; -intermediate = intermediate->backing_hd; -unused->backing_hd = NULL; -bdrv_unref(unused); -} -} - static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; @@ -190,7 +164,7 @@ wait: } } ret = bdrv_change_backing_file(bs, base_id, base_fmt); -close_unused_images(bs, base, base_id); +bdrv_drop_intermediate(bs, bs->backing_hd, base); } qemu_vfree(buf); -- 1.8.5.1
[Qemu-devel] [PATCH v10 11/11] block: Allow backup on referenced named BlockDriverState
Drive backup is a read only operation on source bs. We want to allow this specific case to enable image-fleecing. Note that when image-fleecing job starts, the job still add its blocker to source bs, and any other operation on it will be blocked by that. Signed-off-by: Fam Zheng --- block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block.c b/block.c index b28fb93..1bc00ef 100644 --- a/block.c +++ b/block.c @@ -988,6 +988,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) /* Otherwise we won't be able to commit due to check in bdrv_commit */ bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, bs->backing_blocker); +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), -- 1.8.5.1
[Qemu-devel] [PATCH v10 06/11] block: Add backing_blocker in BlockDriverState
This makes use of op_blocker and blocks all the operations except for commit target, on each BlockDriverState->backing_hd. Signed-off-by: Fam Zheng --- block.c | 16 ++-- include/block/block_int.h | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index ff25749..907c483 100644 --- a/block.c +++ b/block.c @@ -961,13 +961,22 @@ fail: void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { if (bs->backing_hd) { +assert(error_is_set(&bs->backing_blocker)); +bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); bdrv_unref(bs->backing_hd); +} else if (backing_hd) { +error_setg(&bs->backing_blocker, + "device is used as backing hd of '%s'", + bs->device_name); } bs->backing_hd = backing_hd; if (!backing_hd) { bs->backing_file[0] = '\0'; bs->backing_format[0] = '\0'; +if (error_is_set(&bs->backing_blocker)) { +error_free(bs->backing_blocker); +} return; } pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); @@ -975,6 +984,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) backing_hd->drv ? backing_hd->drv->format_name : ""); bdrv_ref(bs->backing_hd); +bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); +/* Otherwise we won't be able to commit due to check in bdrv_commit */ +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1481,8 +1494,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_set_backing_hd(bs, NULL); } bs->drv->bdrv_close(bs); g_free(bs->opaque); diff --git a/include/block/block_int.h b/include/block/block_int.h index 2f6556d..1ac17d5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -341,6 +341,9 @@ struct BlockDriverState { BlockJob *job; QDict *options; + +/* The error object in use for blocking operations on backing_hd */ +Error *backing_blocker; }; int get_tmp_filename(char *filename, int size); -- 1.8.5.1
[Qemu-devel] [PATCH v10 08/11] block: Support dropping active in bdrv_drop_intermediate
Dropping intermediate could be useful both for commit and stream, and BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs to work with op blockers. Signed-off-by: Fam Zheng --- block.c| 145 ++--- block/commit.c | 1 + 2 files changed, 66 insertions(+), 80 deletions(-) diff --git a/block.c b/block.c index 618b8c3..b28fb93 100644 --- a/block.c +++ b/block.c @@ -2190,114 +2190,99 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, return overlay; } -typedef struct BlkIntermediateStates { -BlockDriverState *bs; -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; -} BlkIntermediateStates; - - /* - * Drops images above 'base' up to and including 'top', and sets the image - * above 'top' to have base as its backing file. + * Drops images above 'base' up to and including 'top', and sets new 'base' + * as backing_hd of top_overlay (the image orignally has 'top' as backing + * file). top_overlay may be NULL if 'top' is active, no such update needed. + * Requires that the top_overlay to 'top' is opened r/w. * - * Requires that the overlay to 'top' is opened r/w, so that the backing file - * information in 'bs' can be properly updated. + * 1) This will convert the following chain: * - * E.g., this will convert the following chain: - * bottom <- base <- intermediate <- top <- active + * ... <- base <- ... <- top <- overlay <-... <- active * * to * - * bottom <- base <- active + * ... <- base <- overlay <- active * - * It is allowed for bottom==base, in which case it converts: + * 2) It is allowed for bottom==base, in which case it converts: * - * base <- intermediate <- top <- active + * base <- ... <- top <- overlay <- ... <- active * * to * - * base <- active + * base <- overlay <- active + * + * 2) It also allows active==top, in which case it converts: + * + * ... <- base <- ... <- top (active) + * + * to + * + * ... <- base == active == top + * + * i.e. only base and lower remains: *top == *base when return. + * + * 3) If base==NULL, it will drop all the BDS below overlay and set its + * backing_hd to NULL. I.e.: + * + * base(NULL) <- ... <- overlay <- ... <- active + * + * to * - * Error conditions: - * if active == top, that is considered an error + * overlay <- ... <- active * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base) { -BlockDriverState *intermediate; -BlockDriverState *base_bs = NULL; -BlockDriverState *new_top_bs = NULL; -BlkIntermediateStates *intermediate_state, *next; -int ret = -EIO; - -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; -QSIMPLEQ_INIT(&states_to_delete); - -if (!top->drv || !base->drv) { -goto exit; -} - -new_top_bs = bdrv_find_overlay(active, top); +BlockDriverState *drop_start, *overlay; +int ret = -EINVAL; -if (new_top_bs == NULL) { -/* we could not find the image above 'top', this is an error */ +if (!top->drv || (base && !base->drv)) { goto exit; } - -/* special case of new_top_bs->backing_hd already pointing to base - nothing - * to do, no intermediate images */ -if (new_top_bs->backing_hd == base) { +if (top == base) { ret = 0; -goto exit; -} - -intermediate = top; - -/* now we will go down through the list, and add each BDS we find - * into our deletion queue, until we hit the 'base' - */ -while (intermediate) { -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); -intermediate_state->bs = intermediate; -QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); - -if (intermediate->backing_hd == base) { -base_bs = intermediate->backing_hd; -break; +} else if (top == active) { +assert(base); +drop_start = active->backing_hd; +bdrv_swap(active, base); +base->backing_hd = NULL; +bdrv_unref(drop_start); +ret = 0; +} else { +/* If there's an overlay, its backing_hd points to top's BDS now, + * the top image is dropped but this BDS structure is kept and swapped + * with base, this way we keep the pointers valid after dropping top */ +overlay = bdrv_find_overlay(active, top); +if (!overlay) { +goto exit; +} +if (base) { +ret = bdrv_change_backing_file(overlay, base->filename, + base->drv->format_
[Qemu-devel] [PATCH v10 05/11] block: Add bdrv_set_backing_hd()
This is the common but non-trivial steps to assign or change the backing_hd of BDS. Signed-off-by: Fam Zheng --- block.c | 34 -- include/block/block.h | 1 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index b122154..ff25749 100644 --- a/block.c +++ b/block.c @@ -958,6 +958,29 @@ fail: return ret; } +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +{ +if (bs->backing_hd) { +bdrv_unref(bs->backing_hd); +} + +bs->backing_hd = backing_hd; +if (!backing_hd) { +bs->backing_file[0] = '\0'; +bs->backing_format[0] = '\0'; +return; +} +pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_file), +backing_hd->drv ? backing_hd->drv->format_name : ""); +bdrv_ref(bs->backing_hd); + +pstrcpy(bs->backing_file, sizeof(bs->backing_file), +bs->backing_hd->file->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_format), +bs->backing_hd->drv->format_name); +} + /* * Opens the backing file for a BlockDriverState if not yet open * @@ -971,6 +994,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) char backing_filename[PATH_MAX]; int back_flags, ret; BlockDriver *back_drv = NULL; +BlockDriverState *backing_hd; Error *local_err = NULL; if (bs->backing_hd != NULL) { @@ -994,7 +1018,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) sizeof(backing_filename)); } -bs->backing_hd = bdrv_new(""); +backing_hd = bdrv_new(""); if (bs->backing_format[0] != '\0') { back_drv = bdrv_find_format(bs->backing_format); @@ -1004,20 +1028,18 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ); -ret = bdrv_open(bs->backing_hd, +ret = bdrv_open(backing_hd, *backing_filename ? backing_filename : NULL, options, back_flags, back_drv, &local_err); if (ret < 0) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_unref(backing_hd); bs->open_flags |= BDRV_O_NO_BACKING; error_setg(errp, "Could not open backing file: %s", error_get_pretty(local_err)); error_free(local_err); return ret; } -pstrcpy(bs->backing_file, sizeof(bs->backing_file), -bs->backing_hd->file->filename); +bdrv_set_backing_hd(bs, backing_hd); return 0; } diff --git a/include/block/block.h b/include/block/block.h index 5339f92..4ec0069 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -204,6 +204,7 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, QDict *options, int flags, Error **errp); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, int flags, BlockDriver *drv, Error **errp); -- 1.8.5.1
[Qemu-devel] [PATCH v10 10/11] qmp: Add command 'blockdev-backup'
Similar to drive-backup, but this command uses a device id as target instead of creating/opening an image file. Also add blocker on target bs, since the target is also a named device now. Signed-off-by: Fam Zheng --- block/backup.c | 21 + blockdev.c | 47 +++ qapi-schema.json | 49 + qmp-commands.hx | 44 4 files changed, 161 insertions(+) diff --git a/block/backup.c b/block/backup.c index 0198514..c8fe1a9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -339,6 +339,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); +bdrv_op_unblock_all(target, job->common.blocker); bdrv_unref(target); block_job_completed(&job->common, ret); @@ -364,6 +365,24 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +if (!bdrv_is_inserted(bs)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bs->device_name); +return; +} + +if (!bdrv_is_inserted(target)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, target->device_name); +return; +} + +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +return; +} + +if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { +return; +} + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -377,6 +396,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +bdrv_op_block_all(target, job->common.blocker); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; diff --git a/blockdev.c b/blockdev.c index 7f305d8..5627e5d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1872,6 +1872,8 @@ void qmp_drive_backup(const char *device, const char *target, return; } +/* Although backup_run has this check too, we need to use bs->drv below, so + * do an early check redundantly. */ if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); return; @@ -1888,6 +1890,7 @@ void qmp_drive_backup(const char *device, const char *target, } } +/* Early check to avoid creating target */ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { return; } @@ -1946,6 +1949,50 @@ void qmp_drive_backup(const char *device, const char *target, } } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ +BlockDriverState *bs; +BlockDriverState *target_bs; +Error *local_err = NULL; + +if (!has_speed) { +speed = 0; +} +if (!has_on_source_error) { +on_source_error = BLOCKDEV_ON_ERROR_REPORT; +} +if (!has_on_target_error) { +on_target_error = BLOCKDEV_ON_ERROR_REPORT; +} + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +target_bs = bdrv_find(target); +if (!target_bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, target); +return; +} + +bdrv_ref(target_bs); +backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); +if (local_err != NULL) { +bdrv_unref(target_bs); +error_propagate(errp, local_err); +} +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, diff --git a/qapi-schema.json b/qapi-schema.json index c3c939c..2b5b9af 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1819,6 +1819,40 @@ '*on-target-error': 'BlockdevOnError' } } ## +# @BlockdevBackup +# +# @device: the name of the device which should be copied. +# +# @target: the name of the backup target device. +# +# @sync: what parts of the disk image should be copied to the destination +#(all the disk, only the sectors allocated in the topmost image, or +#only new I/O). +# +# @speed: #optional the maximum speed, in bytes per second. +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block devi
Re: [Qemu-devel] [PATCH v8 08/12] block: Parse "backing" option to reference existing BDS
On 2014年01月03日 17:19, Stefan Hajnoczi wrote: } @@ -1682,7 +1696,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); -assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1701,7 +1714,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); -assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); Why are these hunks part of this patch? I guess it makes sense *not* to check for blockers in bdrv_swap(). Instead the high-level functions in blockdev.c and elsewhere should check blockers. The two checks are here because of the mechanical replace of in_use. They are removed because it is no longer true for some valid cases, e.g with "block-commit". So we need these hunks here, or do this as a preceding in the series. Will update the commit message and keep it as is. Fam
Re: [Qemu-devel] [PATCH v10 00/11] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD
On 2014年01月08日 11:42, Fam Zheng wrote: This series adds for point-in-time snapshot NBD exporting based on blockdev-backup (variant of drive-backup with existing device as target). We get a thin point-in-time snapshot by COW mechanism of drive-backup, and export it through built in NBD server. The steps are as below: 1. (SHELL) qemu-img create -f qcow2 BACKUP.qcow2 (Alternatively we can use -o backing_file=RUNNING-VM.img to omit explicitly providing the size by ourselves, but it's risky because RUNNING-VM.qcow2 is used r/w by guest. Whether or not setting backing file in the image file doesn't matter, as we are going to override the backing hd in the next step) 2. (QMP) blockdev-add backing=source-drive file.driver=file file.filename=BACKUP.qcow2 id=target0 if=none driver=qcow2 (where source-drive is the running BlockDriverState name for RUNNING-VM.img. This patch implements "backing=" option to override backing_hd for added drive) 3. (QMP) blockdev-backup device=source-drive sync=none target=target0 (this is the QMP command introduced by this series, which use a named device as target of drive-backup) 4. (QMP) nbd-server-add device=target0 When image fleecing done: 1. (QMP) block-job-cancel device=source-drive 2. (HMP) drive_del target0 3. (SHELL) rm BACKUP.qcow2 v8 -> v10: Rebased to qemu.git. Address Stefan's comments: [01/11] block: Add BlockOpType enum Change enum definition as internal. [05/11] block: Add bdrv_set_backing_hd() Set bs->backing_file and bs->backing_format. [06/11] block: Add backing_blocker in BlockDriverState Reuse bdrv_set_backing_hd(). [07/11] block: Parse "backing" option to reference existing BDS Fix use-after-free. Check for "backing=" and "backing.file=" conflict. Remove unintended bdrv_swap hunks. Actually it is not unintended, this breaks qemu-iotests 040. The asserts have to be removed to allow block commit. Will respin, sorry for posting three revisions in a row. Fam [08/11] block: Support dropping active in bdrv_drop_intermediate Fix function comment. [09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images Fam Zheng (11): block: Add BlockOpType enum block: Introduce op_blockers to BlockDriverState block: Replace in_use with operation blocker block: Move op_blocker check from block_job_create to its caller block: Add bdrv_set_backing_hd() block: Add backing_blocker in BlockDriverState block: Parse "backing" option to reference existing BDS block: Support dropping active in bdrv_drop_intermediate stream: Use bdrv_drop_intermediate and drop close_unused_images qmp: Add command 'blockdev-backup' block: Allow backup on referenced named BlockDriverState block-migration.c | 7 +- block.c | 306 +++- block/backup.c | 21 +++ block/commit.c | 1 + block/stream.c | 28 +--- blockdev.c | 70 +++-- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 ++- include/block/block.h | 29 +++- include/block/block_int.h | 9 +- include/block/blockjob.h| 3 + qapi-schema.json| 49 +++ qmp-commands.hx | 44 ++ 13 files changed, 446 insertions(+), 154 deletions(-)
[Qemu-devel] [PATCH v11 03/11] block: Replace in_use with operation blocker
This drops BlockDriverState.in_use with op_blockers: - Call bdrv_op_block_all in place of bdrv_set_in_use(bs, 1). - Call bdrv_op_unblock_all in place of bdrv_set_in_use(bs, 0). - Check bdrv_op_is_blocked() in place of bdrv_in_use(bs). The specific types are used, e.g. in place of starting block backup, bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP, ...). - Check bdrv_op_blocker_is_empty() in place of assert(!bs->in_use). Note: there is only bdrv_op_block_all and bdrv_op_unblock_all callers at this moment. So although the checks are specific to op types, this changes can still be seen as identical logic with previously with in_use. The difference is error message are improved because of blocker error info. Signed-off-by: Fam Zheng --- block-migration.c | 7 +-- block.c | 24 +++- blockdev.c | 15 ++- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 --- include/block/block.h | 2 -- include/block/block_int.h | 1 - include/block/blockjob.h| 3 +++ 8 files changed, 42 insertions(+), 43 deletions(-) diff --git a/block-migration.c b/block-migration.c index 897fdba..bf9a25f 100644 --- a/block-migration.c +++ b/block-migration.c @@ -59,6 +59,7 @@ typedef struct BlkMigDevState { unsigned long *aio_bitmap; int64_t completed_sectors; BdrvDirtyBitmap *dirty_bitmap; +Error *blocker; } BlkMigDevState; typedef struct BlkMigBlock { @@ -346,7 +347,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); -bdrv_set_in_use(bs, 1); +error_setg(&bmds->blocker, "block device is in use by migration"); +bdrv_op_block_all(bs, bmds->blocker); bdrv_ref(bs); block_mig_state.total_sector_sum += sectors; @@ -584,7 +586,8 @@ static void blk_mig_cleanup(void) blk_mig_lock(); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); -bdrv_set_in_use(bmds->bs, 0); +bdrv_op_unblock_all(bmds->bs, bmds->blocker); +error_free(bmds->blocker); bdrv_unref(bmds->bs); g_free(bmds->aio_bitmap); g_free(bmds); diff --git a/block.c b/block.c index 91cda9c..b122154 100644 --- a/block.c +++ b/block.c @@ -1621,7 +1621,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->refcnt = bs_src->refcnt; /* job */ -bs_dest->in_use = bs_src->in_use; bs_dest->job= bs_src->job; /* keep the same entry in bdrv_states */ @@ -1653,7 +1652,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1672,7 +1671,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); -assert(bs_new->in_use == 0); +assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1709,7 +1708,7 @@ static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); -assert(!bs->in_use); +assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -1891,7 +1890,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } -if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || +bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { return -EBUSY; } @@ -2924,8 +2924,9 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; -if (bdrv_in_use(bs)) +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { return -EBUSY; +} ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -4705,17 +4706,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } -void bdrv_set_in_use(BlockDriverState *bs, int in_use) -{ -assert(bs->in_use
[Qemu-devel] [PATCH v11 11/11] block: Allow backup on referenced named BlockDriverState
Drive backup is a read only operation on source bs. We want to allow this specific case to enable image-fleecing. Note that when image-fleecing job starts, the job still add its blocker to source bs, and any other operation on it will be blocked by that. Signed-off-by: Fam Zheng --- block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block.c b/block.c index 1ceb852..c2e91a6 100644 --- a/block.c +++ b/block.c @@ -988,6 +988,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) /* Otherwise we won't be able to commit due to check in bdrv_commit */ bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, bs->backing_blocker); +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), -- 1.8.5.1
[Qemu-devel] [PATCH v11 07/11] block: Parse "backing" option to reference existing BDS
Now it's safe to allow reference for backing_hd in the interface. Signed-off-by: Fam Zheng --- block.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 2ccc006..ca4e362 100644 --- a/block.c +++ b/block.c @@ -1199,12 +1199,34 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* If there is a backing file, use it */ if ((flags & BDRV_O_NO_BACKING) == 0) { QDict *backing_options; +const char *backing_name; +BlockDriverState *backing_hd; +backing_name = qdict_get_try_str(options, "backing"); qdict_extract_subqdict(options, &backing_options, "backing."); -ret = bdrv_open_backing_file(bs, backing_options, &local_err); -if (ret < 0) { + +if (backing_name && qdict_size(backing_options)) { +error_setg(&local_err, + "Option \"backing\" and \"backing.*\" cannot be " + "used together"); +ret = -EINVAL; goto close_and_fail; } +if (backing_name) { +backing_hd = bdrv_find(backing_name); +if (!backing_hd) { +error_set(&local_err, QERR_DEVICE_NOT_FOUND, backing_name); +ret = -ENOENT; +goto close_and_fail; +} +qdict_del(options, "backing"); +bdrv_set_backing_hd(bs, backing_hd); +} else { +ret = bdrv_open_backing_file(bs, backing_options, &local_err); +if (ret < 0) { +goto close_and_fail; +} +} } /* Check if any unknown options were used */ -- 1.8.5.1
[Qemu-devel] [PATCH v11 00/11] Drop in_use from BlockDriverState and enable point-in-time snapshot exporting over NBD
This series adds for point-in-time snapshot NBD exporting based on blockdev-backup (variant of drive-backup with existing device as target). We get a thin point-in-time snapshot by COW mechanism of drive-backup, and export it through built in NBD server. The steps are as below: 1. (SHELL) qemu-img create -f qcow2 BACKUP.qcow2 (Alternatively we can use -o backing_file=RUNNING-VM.img to omit explicitly providing the size by ourselves, but it's risky because RUNNING-VM.qcow2 is used r/w by guest. Whether or not setting backing file in the image file doesn't matter, as we are going to override the backing hd in the next step) 2. (QMP) blockdev-add backing=source-drive file.driver=file file.filename=BACKUP.qcow2 id=target0 if=none driver=qcow2 (where source-drive is the running BlockDriverState name for RUNNING-VM.img. This patch implements "backing=" option to override backing_hd for added drive) 3. (QMP) blockdev-backup device=source-drive sync=none target=target0 (this is the QMP command introduced by this series, which use a named device as target of drive-backup) 4. (QMP) nbd-server-add device=target0 When image fleecing done: 1. (QMP) block-job-cancel device=source-drive 2. (HMP) drive_del target0 3. (SHELL) rm BACKUP.qcow2 v8 -> v11: Rebased to qemu.git. Address Stefan's comments: (sorry for sending 3 revisions in a day) [01/11] block: Add BlockOpType enum Change enum definition as internal. [05/11] block: Add bdrv_set_backing_hd() Set bs->backing_file and bs->backing_format. [06/11] block: Add backing_blocker in BlockDriverState Reuse bdrv_set_backing_hd(). [07/11] block: Parse "backing" option to reference existing BDS Update commit message about bdrv_swap assertion removal. Fix use-after-free. Check for "backing=" and "backing.file=" conflict. [08/11] block: Support dropping active in bdrv_drop_intermediate Fix function comment. [09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images Fam Zheng (11): block: Add BlockOpType enum block: Introduce op_blockers to BlockDriverState block: Replace in_use with operation blocker block: Move op_blocker check from block_job_create to its caller block: Add bdrv_set_backing_hd() block: Add backing_blocker in BlockDriverState block: Parse "backing" option to reference existing BDS block: Support dropping active in bdrv_drop_intermediate stream: Use bdrv_drop_intermediate and drop close_unused_images qmp: Add command 'blockdev-backup' block: Allow backup on referenced named BlockDriverState block-migration.c | 7 +- block.c | 304 +++- block/backup.c | 21 +++ block/commit.c | 1 + block/stream.c | 28 +--- blockdev.c | 70 +++-- blockjob.c | 14 +- hw/block/dataplane/virtio-blk.c | 19 ++- include/block/block.h | 29 +++- include/block/block_int.h | 9 +- include/block/blockjob.h| 3 + qapi-schema.json| 49 +++ qmp-commands.hx | 44 ++ 13 files changed, 444 insertions(+), 154 deletions(-) -- 1.8.5.1
[Qemu-devel] [PATCH v11 09/11] stream: Use bdrv_drop_intermediate and drop close_unused_images
This reuses the new bdrv_drop_intermediate. Signed-off-by: Fam Zheng --- block/stream.c | 28 +--- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/block/stream.c b/block/stream.c index 46bec7d..9cdcf0e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -51,32 +51,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } -static void close_unused_images(BlockDriverState *top, BlockDriverState *base, -const char *base_id) -{ -BlockDriverState *intermediate; -intermediate = top->backing_hd; - -/* Must assign before bdrv_delete() to prevent traversing dangling pointer - * while we delete backing image instances. - */ -top->backing_hd = base; - -while (intermediate) { -BlockDriverState *unused; - -/* reached base */ -if (intermediate == base) { -break; -} - -unused = intermediate; -intermediate = intermediate->backing_hd; -unused->backing_hd = NULL; -bdrv_unref(unused); -} -} - static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; @@ -190,7 +164,7 @@ wait: } } ret = bdrv_change_backing_file(bs, base_id, base_fmt); -close_unused_images(bs, base, base_id); +bdrv_drop_intermediate(bs, bs->backing_hd, base); } qemu_vfree(buf); -- 1.8.5.1
[Qemu-devel] [PATCH v11 02/11] block: Introduce op_blockers to BlockDriverState
BlockDriverState.op_blockers is an array of lists with BLOCK_OP_TYPE_MAX elements. Each list is a list of blockers of an operation type (BlockOpType), that marks this BDS as currently blocked for a certain type of operation with reason errors stored in the list. The rule of usage is: * BDS user who wants to take an operation should check if there's any blocker of the type with bdrv_op_is_blocked(). * BDS user who wants to block certain types of operation, should call bdrv_op_block (or bdrv_op_block_all to block all types of operations, which is similar to the existing bdrv_set_in_use()). * A blocker is only referenced by op_blockers, so the lifecycle is managed by caller, and shouldn't be lost until unblock, so typically a caller does these: - Allocate a blocker with error_setg or similar, call bdrv_op_block() to block some operations. - Hold the blocker, do his job. - Unblock operations that it blocked, with the same reason pointer passed to bdrv_op_unblock(). - Release the blocker with error_free(). Signed-off-by: Fam Zheng --- block.c | 71 +++ include/block/block.h | 7 + include/block/block_int.h | 5 3 files changed, 83 insertions(+) diff --git a/block.c b/block.c index 64e7d22..91cda9c 100644 --- a/block.c +++ b/block.c @@ -1627,6 +1627,8 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* keep the same entry in bdrv_states */ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), bs_src->device_name); +memcpy(bs_dest->op_blockers, bs_src->op_blockers, + sizeof(bs_dest->op_blockers)); bs_dest->list = bs_src->list; } @@ -4634,6 +4636,75 @@ void bdrv_unref(BlockDriverState *bs) } } +struct BdrvOpBlocker { +Error *reason; +QLIST_ENTRY(BdrvOpBlocker) list; +}; + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) +{ +BdrvOpBlocker *blocker; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); +if (!QLIST_EMPTY(&bs->op_blockers[op])) { +blocker = QLIST_FIRST(&bs->op_blockers[op]); +if (errp) { +*errp = error_copy(blocker->reason); +} +return true; +} +return false; +} + +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) +{ +BdrvOpBlocker *blocker; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + +blocker = g_malloc0(sizeof(BdrvOpBlocker)); +blocker->reason = reason; +QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); +} + +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) +{ +BdrvOpBlocker *blocker, *next; +assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); +QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { +if (blocker->reason == reason) { +QLIST_REMOVE(blocker, list); +g_free(blocker); +} +} +} + +void bdrv_op_block_all(BlockDriverState *bs, Error *reason) +{ +int i; +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +bdrv_op_block(bs, i, reason); +} +} + +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) +{ +int i; +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +bdrv_op_unblock(bs, i, reason); +} +} + +bool bdrv_op_blocker_is_empty(BlockDriverState *bs) +{ +int i; + +for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { +if (!QLIST_EMPTY(&bs->op_blockers[i])) { +return false; +} +} +return true; +} + void bdrv_set_in_use(BlockDriverState *bs, int in_use) { assert(bs->in_use != in_use); diff --git a/include/block/block.h b/include/block/block.h index 2bc39fe..e6073e7 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -461,6 +461,13 @@ void bdrv_unref(BlockDriverState *bs); void bdrv_set_in_use(BlockDriverState *bs, int in_use); int bdrv_in_use(BlockDriverState *bs); +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + #ifdef CONFIG_LINUX_AIO int raw_get_aio_fd(BlockDriverState *bs); #else diff --git a/include/block/block_int.h b/include/block/block_int.h index 8b132d7..458acd6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -252,6 +252,8 @@ typedef struct BlockLimits { int opt_transfer_length; } BlockLimits; +typedef struct BdrvOpBlocker BdrvOpBlocker; + /* * Note: the function bdrv_append() copies and swaps contents of * BlockDriverStates,
[Qemu-devel] [PATCH v11 06/11] block: Add backing_blocker in BlockDriverState
This makes use of op_blocker and blocks all the operations except for commit target, on each BlockDriverState->backing_hd. The asserts for op_blocker in bdrv_swap are removed because with this change, the target of block commit has at least the backing blocker of its child, so the assertion is not true. Callers should do their check. Signed-off-by: Fam Zheng --- block.c | 18 ++ include/block/block_int.h | 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index ff25749..2ccc006 100644 --- a/block.c +++ b/block.c @@ -961,13 +961,22 @@ fail: void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { if (bs->backing_hd) { +assert(error_is_set(&bs->backing_blocker)); +bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); bdrv_unref(bs->backing_hd); +} else if (backing_hd) { +error_setg(&bs->backing_blocker, + "device is used as backing hd of '%s'", + bs->device_name); } bs->backing_hd = backing_hd; if (!backing_hd) { bs->backing_file[0] = '\0'; bs->backing_format[0] = '\0'; +if (error_is_set(&bs->backing_blocker)) { +error_free(bs->backing_blocker); +} return; } pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); @@ -975,6 +984,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) backing_hd->drv ? backing_hd->drv->format_name : ""); bdrv_ref(bs->backing_hd); +bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); +/* Otherwise we won't be able to commit due to check in bdrv_commit */ +bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, +bs->backing_blocker); pstrcpy(bs->backing_file, sizeof(bs->backing_file), bs->backing_hd->file->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1481,8 +1494,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_set_backing_hd(bs, NULL); } bs->drv->bdrv_close(bs); g_free(bs->opaque); @@ -1674,7 +1686,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); -assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1693,7 +1704,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); -assert(bdrv_op_blocker_is_empty(bs_new)); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); diff --git a/include/block/block_int.h b/include/block/block_int.h index 2f6556d..1ac17d5 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -341,6 +341,9 @@ struct BlockDriverState { BlockJob *job; QDict *options; + +/* The error object in use for blocking operations on backing_hd */ +Error *backing_blocker; }; int get_tmp_filename(char *filename, int size); -- 1.8.5.1
[Qemu-devel] [PATCH v11 08/11] block: Support dropping active in bdrv_drop_intermediate
Dropping intermediate could be useful both for commit and stream, and BDS refcnt plus bdrv_swap could do most of the job nicely. It also needs to work with op blockers. Signed-off-by: Fam Zheng --- block.c| 145 ++--- block/commit.c | 1 + 2 files changed, 66 insertions(+), 80 deletions(-) diff --git a/block.c b/block.c index ca4e362..1ceb852 100644 --- a/block.c +++ b/block.c @@ -2188,114 +2188,99 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, return overlay; } -typedef struct BlkIntermediateStates { -BlockDriverState *bs; -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; -} BlkIntermediateStates; - - /* - * Drops images above 'base' up to and including 'top', and sets the image - * above 'top' to have base as its backing file. + * Drops images above 'base' up to and including 'top', and sets new 'base' + * as backing_hd of top_overlay (the image orignally has 'top' as backing + * file). top_overlay may be NULL if 'top' is active, no such update needed. + * Requires that the top_overlay to 'top' is opened r/w. * - * Requires that the overlay to 'top' is opened r/w, so that the backing file - * information in 'bs' can be properly updated. + * 1) This will convert the following chain: * - * E.g., this will convert the following chain: - * bottom <- base <- intermediate <- top <- active + * ... <- base <- ... <- top <- overlay <-... <- active * * to * - * bottom <- base <- active + * ... <- base <- overlay <- active * - * It is allowed for bottom==base, in which case it converts: + * 2) It is allowed for bottom==base, in which case it converts: * - * base <- intermediate <- top <- active + * base <- ... <- top <- overlay <- ... <- active * * to * - * base <- active + * base <- overlay <- active + * + * 2) It also allows active==top, in which case it converts: + * + * ... <- base <- ... <- top (active) + * + * to + * + * ... <- base == active == top + * + * i.e. only base and lower remains: *top == *base when return. + * + * 3) If base==NULL, it will drop all the BDS below overlay and set its + * backing_hd to NULL. I.e.: + * + * base(NULL) <- ... <- overlay <- ... <- active + * + * to * - * Error conditions: - * if active == top, that is considered an error + * overlay <- ... <- active * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base) { -BlockDriverState *intermediate; -BlockDriverState *base_bs = NULL; -BlockDriverState *new_top_bs = NULL; -BlkIntermediateStates *intermediate_state, *next; -int ret = -EIO; - -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; -QSIMPLEQ_INIT(&states_to_delete); - -if (!top->drv || !base->drv) { -goto exit; -} - -new_top_bs = bdrv_find_overlay(active, top); +BlockDriverState *drop_start, *overlay; +int ret = -EINVAL; -if (new_top_bs == NULL) { -/* we could not find the image above 'top', this is an error */ +if (!top->drv || (base && !base->drv)) { goto exit; } - -/* special case of new_top_bs->backing_hd already pointing to base - nothing - * to do, no intermediate images */ -if (new_top_bs->backing_hd == base) { +if (top == base) { ret = 0; -goto exit; -} - -intermediate = top; - -/* now we will go down through the list, and add each BDS we find - * into our deletion queue, until we hit the 'base' - */ -while (intermediate) { -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); -intermediate_state->bs = intermediate; -QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); - -if (intermediate->backing_hd == base) { -base_bs = intermediate->backing_hd; -break; +} else if (top == active) { +assert(base); +drop_start = active->backing_hd; +bdrv_swap(active, base); +base->backing_hd = NULL; +bdrv_unref(drop_start); +ret = 0; +} else { +/* If there's an overlay, its backing_hd points to top's BDS now, + * the top image is dropped but this BDS structure is kept and swapped + * with base, this way we keep the pointers valid after dropping top */ +overlay = bdrv_find_overlay(active, top); +if (!overlay) { +goto exit; +} +if (base) { +ret = bdrv_change_backing_file(overlay, base->filename, + base->drv->format_
[Qemu-devel] [PATCH v11 04/11] block: Move op_blocker check from block_job_create to its caller
It makes no sense to check for "any" blocker on bs, we are here only because of the mechanical conversion from in_use to op_blockers. Remove it now, and let the callers check specific operation types. Backup and mirror already have it, add checker to stream and commit. Signed-off-by: Fam Zheng --- blockdev.c | 8 blockjob.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 9d36775..7f305d8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1754,6 +1754,10 @@ void qmp_block_stream(const char *device, bool has_base, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) { +return; +} + if (base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { @@ -1794,6 +1798,10 @@ void qmp_block_commit(const char *device, return; } +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { +return; +} + /* default top_bs is the active layer */ top_bs = bs; diff --git a/blockjob.c b/blockjob.c index f1ff036..21e21c0 100644 --- a/blockjob.c +++ b/blockjob.c @@ -41,7 +41,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, { BlockJob *job; -if (bs->job || !bdrv_op_blocker_is_empty(bs)) { +if (bs->job) { error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } -- 1.8.5.1
[Qemu-devel] [PATCH v11 10/11] qmp: Add command 'blockdev-backup'
Similar to drive-backup, but this command uses a device id as target instead of creating/opening an image file. Also add blocker on target bs, since the target is also a named device now. Signed-off-by: Fam Zheng --- block/backup.c | 21 + blockdev.c | 47 +++ qapi-schema.json | 49 + qmp-commands.hx | 44 4 files changed, 161 insertions(+) diff --git a/block/backup.c b/block/backup.c index 0198514..c8fe1a9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -339,6 +339,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); +bdrv_op_unblock_all(target, job->common.blocker); bdrv_unref(target); block_job_completed(&job->common, ret); @@ -364,6 +365,24 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +if (!bdrv_is_inserted(bs)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bs->device_name); +return; +} + +if (!bdrv_is_inserted(target)) { +error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, target->device_name); +return; +} + +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +return; +} + +if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { +return; +} + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -377,6 +396,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +bdrv_op_block_all(target, job->common.blocker); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; diff --git a/blockdev.c b/blockdev.c index 7f305d8..5627e5d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1872,6 +1872,8 @@ void qmp_drive_backup(const char *device, const char *target, return; } +/* Although backup_run has this check too, we need to use bs->drv below, so + * do an early check redundantly. */ if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); return; @@ -1888,6 +1890,7 @@ void qmp_drive_backup(const char *device, const char *target, } } +/* Early check to avoid creating target */ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { return; } @@ -1946,6 +1949,50 @@ void qmp_drive_backup(const char *device, const char *target, } } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ +BlockDriverState *bs; +BlockDriverState *target_bs; +Error *local_err = NULL; + +if (!has_speed) { +speed = 0; +} +if (!has_on_source_error) { +on_source_error = BLOCKDEV_ON_ERROR_REPORT; +} +if (!has_on_target_error) { +on_target_error = BLOCKDEV_ON_ERROR_REPORT; +} + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +target_bs = bdrv_find(target); +if (!target_bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, target); +return; +} + +bdrv_ref(target_bs); +backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); +if (local_err != NULL) { +bdrv_unref(target_bs); +error_propagate(errp, local_err); +} +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, diff --git a/qapi-schema.json b/qapi-schema.json index c3c939c..2b5b9af 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1819,6 +1819,40 @@ '*on-target-error': 'BlockdevOnError' } } ## +# @BlockdevBackup +# +# @device: the name of the device which should be copied. +# +# @target: the name of the backup target device. +# +# @sync: what parts of the disk image should be copied to the destination +#(all the disk, only the sectors allocated in the topmost image, or +#only new I/O). +# +# @speed: #optional the maximum speed, in bytes per second. +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block devi
[Qemu-devel] [PATCH v11 01/11] block: Add BlockOpType enum
This adds the enum of all the operations that can be taken on a block device. Signed-off-by: Fam Zheng --- include/block/block.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index 36efaea..2bc39fe 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -151,6 +151,25 @@ typedef struct BDRVReopenState { void *opaque; } BDRVReopenState; +/* + * Block operation types + */ +typedef enum BlockOpType { +BLOCK_OP_TYPE_BACKUP_SOURCE, +BLOCK_OP_TYPE_BACKUP_TARGET, +BLOCK_OP_TYPE_CHANGE, +BLOCK_OP_TYPE_COMMIT, +BLOCK_OP_TYPE_DATAPLANE, +BLOCK_OP_TYPE_DRIVE_DEL, +BLOCK_OP_TYPE_EJECT, +BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, +BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, +BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, +BLOCK_OP_TYPE_MIRROR, +BLOCK_OP_TYPE_RESIZE, +BLOCK_OP_TYPE_STREAM, +BLOCK_OP_TYPE_MAX, +} BlockOpType; void bdrv_iostatus_enable(BlockDriverState *bs); void bdrv_iostatus_reset(BlockDriverState *bs); -- 1.8.5.1
[Qemu-devel] [PATCH v11 05/11] block: Add bdrv_set_backing_hd()
This is the common but non-trivial steps to assign or change the backing_hd of BDS. Signed-off-by: Fam Zheng --- block.c | 34 -- include/block/block.h | 1 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index b122154..ff25749 100644 --- a/block.c +++ b/block.c @@ -958,6 +958,29 @@ fail: return ret; } +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +{ +if (bs->backing_hd) { +bdrv_unref(bs->backing_hd); +} + +bs->backing_hd = backing_hd; +if (!backing_hd) { +bs->backing_file[0] = '\0'; +bs->backing_format[0] = '\0'; +return; +} +pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_file), +backing_hd->drv ? backing_hd->drv->format_name : ""); +bdrv_ref(bs->backing_hd); + +pstrcpy(bs->backing_file, sizeof(bs->backing_file), +bs->backing_hd->file->filename); +pstrcpy(bs->backing_format, sizeof(bs->backing_format), +bs->backing_hd->drv->format_name); +} + /* * Opens the backing file for a BlockDriverState if not yet open * @@ -971,6 +994,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) char backing_filename[PATH_MAX]; int back_flags, ret; BlockDriver *back_drv = NULL; +BlockDriverState *backing_hd; Error *local_err = NULL; if (bs->backing_hd != NULL) { @@ -994,7 +1018,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) sizeof(backing_filename)); } -bs->backing_hd = bdrv_new(""); +backing_hd = bdrv_new(""); if (bs->backing_format[0] != '\0') { back_drv = bdrv_find_format(bs->backing_format); @@ -1004,20 +1028,18 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ); -ret = bdrv_open(bs->backing_hd, +ret = bdrv_open(backing_hd, *backing_filename ? backing_filename : NULL, options, back_flags, back_drv, &local_err); if (ret < 0) { -bdrv_unref(bs->backing_hd); -bs->backing_hd = NULL; +bdrv_unref(backing_hd); bs->open_flags |= BDRV_O_NO_BACKING; error_setg(errp, "Could not open backing file: %s", error_get_pretty(local_err)); error_free(local_err); return ret; } -pstrcpy(bs->backing_file, sizeof(bs->backing_file), -bs->backing_hd->file->filename); +bdrv_set_backing_hd(bs, backing_hd); return 0; } diff --git a/include/block/block.h b/include/block/block.h index 5339f92..4ec0069 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -204,6 +204,7 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, QDict *options, int flags, Error **errp); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, int flags, BlockDriver *drv, Error **errp); -- 1.8.5.1
Re: [Qemu-devel] [PATCH] block: fix backing file segfault
Hi Peter, Thank your for catching this. On 2014年01月09日 03:43, Peter Feiner wrote: When a backing file is opened such that (1) a protocol is directly used as the block driver and (2) the block driver has bdrv_file_open, bdrv_open_backing_file segfaults. The problem arises because bdrv_open_common returns without setting bd->backing_hd->file. To effect (1), you seem to have to use the -F flag in qemu-img. There are several block drivers that satisfy (2), such as "file" and "nbd". Here are some concrete examples: #!/bin/bash echo Test file format ./qemu-img create -f file base.file 1m ./qemu-img create -f qcow2 -F file -o backing_file=base.file\ file-overlay.qcow2 ./qemu-img convert -O raw file-overlay.qcow2 file-convert.raw echo Test nbd format SOCK=$PWD/nbd.sock ./qemu-img create -f raw base.raw 1m ./qemu-nbd -t -k $SOCK base.raw & trap "kill $!" EXIT while ! test -e $SOCK; do sleep 1; done ./qemu-img create -f qcow2 -F nbd -o backing_file=nbd:unix:$SOCK\ nbd-overlay.qcow2 ./qemu-img convert -O raw nbd-overlay.qcow2 nbd-convert.raw It would be nice if you can add this as a test case under tests/qemu-iotests. Without this patch, the two qemu-img convert commands segfault. This is a regression that was introduced in v1.7 by dbecebddfa4932d1c83915bcb9b5ba5984eb91be. Signed-off-by: Peter Feiner --- block.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 64e7d22..a4a172d 100644 --- a/block.c +++ b/block.c @@ -1016,8 +1016,9 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); return ret; } -pstrcpy(bs->backing_file, sizeof(bs->backing_file), -bs->backing_hd->file->filename); +if (bs->backing_hd->file) +pstrcpy(bs->backing_file, sizeof(bs->backing_file), +bs->backing_hd->file->filename); Braces are mandatory for if statements (scripts/checkpatch.pl can check the coding style for you). Thanks, Fam
[Qemu-devel] [PATCH 3/9] block: Add bdrv_dirty_bitmap_make_anon
This will unset the name of dirty bitmap. Signed-off-by: Fam Zheng --- block.c | 5 + include/block/block.h | 1 + 2 files changed, 6 insertions(+) diff --git a/block.c b/block.c index 16ef61b..f7e6851 100644 --- a/block.c +++ b/block.c @@ -4545,6 +4545,11 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, return NULL; } +void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ +bitmap->name[0] = '\0'; +} + BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity, const char *name, diff --git a/include/block/block.h b/include/block/block.h index 8dafa42..6c88f7a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -430,6 +430,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, Error **errp); BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name); +void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); -- 1.8.5.2
[Qemu-devel] [PATCH 0/9] QMP: Introduce incremental drive-backup with in-memory dirty bitmap
This implements incremental backup. A few new QMP commands related to dirty bitmap are added: dirty-bitmap-add * dirty-bitmap-disable * dirty-bitmap-remove (*: also supported as transactions) As their name implies, they manipulate a block device's dirty bitmap. This doesn't interfere with dirty bitmap used for migration, backup, mirror, etc, which don't have a name and are invisible to user. Only named bitmaps (created by dirty-bitmap-add) can be disabled/removed by user. They are added to support "user controlled write tracking", so as to determine the range of date for incremental backup. A new sync mode for drive-backup is introduced: drive-backup device=.. mode=.. sync=dirty-bitmap bitmap=bitmap0 Which will scan dirty bitmap "bitmap0" and only copy all dirty sectors to target. Now, let's see the usage with a simple example: # Start the guest vm = VM() vm.start() # Fake some guest writes with "qemu-io", this is before creating dirty # bitmap, so it won't be copied vm.hmp('qemu-io ide0-hd0 "write -P 0xa 512k 1M"') # Create a dirty bitmap to track writes vm.qmp("dirty-bitmap-add", device="ide0-hd0", name="dirty-0") # Fake some more guest writes with "qemu-io", this will be copied vm.hmp('qemu-io ide0-hd0 "write -P 0xa 512M 1M"') # Now "disable" the first dirty bitmap, do the backup according to it, # at meantime continue to track dirty with a new dirty bitmap vm.qmp("transaction", actions=[ { 'type': 'dirty-bitmap-disable', 'data': { 'device': 'ide0-hd0', 'name': 'dirty-0' } }, { 'type': 'dirty-bitmap-add', 'data': { 'device': 'ide0-hd0', 'name': 'dirty-1' } }, { 'type': 'drive-backup', 'data': { 'device': 'ide0-hd0', 'target': '/tmp/incre.qcow2', 'bitmap': 'dirty-0', 'sync': 'dirty-bitmap' } } ]) # Once backup job started, the dirty bitmap can be removed (actually only # hidden from user since it is still referenced by block job vm.qmp("dirty-bitmap-remove", device="ide0-hd0", name="dirty-0") Wait the block job to complete, then let's check the target image to see what data is copied: ./qemu-img map /tmp/incre.qcow2 Offset Length Mapped to File 0x2000 0x100x16/tmp/incre.qcow2 Yes, only the data written after "dirty0" creation is copied. If this interface looks good, test cases will be included in the next revision. Also, it's quite easy to use an rbd server or other protocols to do the remote backup over network. P.S. Persistent dirty bitmap could be built on top of this series, but is not yet implemented, because the storage format and integrity measures are not quite clear for now. The discussion is still open and any questions, ideas, use cases and concerns are all welcome! Fam Fam Zheng (9): block: Introduce reference count for dirty bitmaps qapi: Add optional field "name" to block dirty bitmap block: Add bdrv_dirty_bitmap_make_anon qmp: Add dirty-bitmap-add and dirty-bitmap-remove block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap block: Introduce bdrv_dirty_bitmap_granularity() block: Add support of "dirty-bitmap" sync mode qmp: Add dirty-bitmap-disable command qapi: Add transaction support to dirty-bitmap-{add,disable} block-migration.c | 3 +- block.c | 79 +-- block/backup.c| 34 +- block/mirror.c| 6 +- blockdev.c| 156 +- hmp.c | 3 +- include/block/block.h | 13 +++- include/block/block_int.h | 2 + qapi-schema.json | 79 +-- qmp-commands.hx | 61 +- 10 files changed, 416 insertions(+), 20 deletions(-) -- 1.8.5.2
[Qemu-devel] [PATCH 4/9] qmp: Add dirty-bitmap-add and dirty-bitmap-remove
The new command pair is added to manage user created dirty bitmap. The dirty bitmap's name is mandatory and must be unique for the same device, but different devices can have bitmaps with the same names. Signed-off-by: Fam Zheng --- blockdev.c | 60 qapi-schema.json | 45 ++ qmp-commands.hx | 49 + 3 files changed, 154 insertions(+) diff --git a/blockdev.c b/blockdev.c index 2c3242b..dd0f2ac 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1633,6 +1633,66 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, } } +void qmp_dirty_bitmap_add(const char *device, const char *name, + bool has_granularity, int64_t granularity, + Error **errp) +{ +BlockDriverState *bs; +BdrvDirtyBitmap *bitmap; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +if (!name || name[0] == '\0') { +error_setg(errp, "Bitmap name cannot be empty"); +return; +} +if (has_granularity) { +if (granularity & (granularity - 1)) { +error_setg(errp, "Granularity must be power of 2"); +return; +} +} else { +granularity = 65536; +} + +bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); +if (!bitmap) { +return; +} +} + +void qmp_dirty_bitmap_remove(const char *device, const char *name, + Error **errp) +{ +BlockDriverState *bs; +BdrvDirtyBitmap *bitmap; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +if (!name || name[0] == '\0') { +error_setg(errp, "Bitmap name cannot be empty"); +return; +} +bitmap = bdrv_find_dirty_bitmap(bs, name); +if (!bitmap) { +error_setg(errp, "Dirty bitmap not found: %s", name); +return; +} + +/* Make it invisible to user in case the following + * bdrv_release_dirty_bitmap doens't free it because of refcnt */ +bdrv_dirty_bitmap_make_anon(bs, bitmap); +bdrv_release_dirty_bitmap(bs, bitmap); +} + int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) { const char *id = qdict_get_str(qdict, "id"); diff --git a/qapi-schema.json b/qapi-schema.json index e91143a..095d6c0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2062,6 +2062,51 @@ '*on-target-error': 'BlockdevOnError' } } ## +# @DirtyBitmap +# +# @device: name of device which the bitmap is tracking +# +# @name: name of the dirty bitmap +# +# @granularity: #optional the bitmap granularity, default is 64k for +# dirty-bitmap-add +# +# Since 2.0 +## +{ 'type': 'DirtyBitmap', + 'data': { 'device': 'str', 'name': 'str', '*granularity': 'int' } } + +## +# @dirty-bitmap-add +# +# Create a dirty bitmap with a name on the device +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If @name is already taken, GenericError with an explaining message +# +# Since 2.0 +## +{'command': 'dirty-bitmap-add', + 'data': 'DirtyBitmap' } + +## +# @dirty-bitmap-remove +# +# Remove a dirty bitmap on the device +# +# Setting granularity has no effect here. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If @name is not found, GenericError with an explaining message +# +# Since 2.0 +## +{'command': 'dirty-bitmap-remove', + 'data': { 'device': 'str', 'name': 'str' } } + +## # @migrate_cancel # # Cancel the current executing migration process. diff --git a/qmp-commands.hx b/qmp-commands.hx index fba15cd..fbbf29c 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1064,6 +1064,55 @@ Example: EQMP { +.name = "dirty-bitmap-add", +.args_type = "device:B,name:s,granularity:i?", +.mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_add, +}, +{ +.name = "dirty-bitmap-remove", +.args_type = "device:B,name:s", +.mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_remove, +}, + +SQMP + +dirty-bitmap-add + + +Create a dirty bitmap with a name on the device, and start tracking the writes. + +Arguments: + +- "device": device name to create dirty bitmap (json-string) +- "name": name of the new dirty bitmap (json-string) +- "granularity": granul
[Qemu-devel] [PATCH 7/9] block: Add support of "dirty-bitmap" sync mode
For "dirty-bitmap" sync mode, the block job will iterate through the given dirty bitmap to decide if a sector needs backup (backup all the dirty clusters and skip clean ones), just as allocation conditions of "top" sync mode. If the dirty bitmap is updated (because of guest writes) while the block job is scanning it, an extra time of copy of written sectors could happen because of overlapping of write interception and dirty bitmap. User should build a transaction to: - Deactive the dirty bitmap. - Start a backup job with the dirty bitmap. - Create another dirty bitmap for continuity of incremental tracking. With coming dirty bitmap transaction patches. Signed-off-by: Fam Zheng --- block/backup.c| 34 +- block/mirror.c| 4 blockdev.c| 6 +- hmp.c | 3 ++- include/block/block_int.h | 2 ++ qapi-schema.json | 10 ++ qmp-commands.hx | 7 --- 7 files changed, 56 insertions(+), 10 deletions(-) diff --git a/block/backup.c b/block/backup.c index 0198514..78ca9b1 100644 --- a/block/backup.c +++ b/block/backup.c @@ -37,6 +37,8 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; BlockDriverState *target; +BdrvDirtyBitmap *sync_bitmap; +int sync_bitmap_gran; MirrorSyncMode sync_mode; RateLimit limit; BlockdevOnError on_source_error; @@ -258,7 +260,7 @@ static void coroutine_fn backup_run(void *opaque) job->common.busy = true; } } else { -/* Both FULL and TOP SYNC_MODE's require copying.. */ +/* FULL, TOP and DIRTY_BITMAP SYNC_MODE's require copying.. */ for (; start < end; start++) { bool error_is_read; @@ -312,7 +314,21 @@ static void coroutine_fn backup_run(void *opaque) if (alloced == 0) { continue; } +} else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { +int i, dirty = 0; +for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER; + i += job->sync_bitmap_gran) { +if (bdrv_get_dirty(bs, job->sync_bitmap, +start * BACKUP_SECTORS_PER_CLUSTER + i)) { +dirty = 1; +break; +} +} +if (!dirty) { +continue; +} } + /* FULL sync mode we copy the whole drive. */ ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, BACKUP_SECTORS_PER_CLUSTER, &error_is_read); @@ -336,6 +352,9 @@ static void coroutine_fn backup_run(void *opaque) qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); +if (job->sync_bitmap) { +bdrv_release_dirty_bitmap(bs, job->sync_bitmap); +} hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); @@ -346,6 +365,7 @@ static void coroutine_fn backup_run(void *opaque) void backup_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, @@ -364,6 +384,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } +if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP && !sync_bitmap) { +error_setg(errp, "must provide a valid bitmap name for \"dirty-bitmap\"" + "sync mode"); +return; +} + +if (sync_bitmap) { +bdrv_reference_dirty_bitmap(bs, sync_bitmap); +} + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -381,6 +411,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, job->on_target_error = on_target_error; job->target = target; job->sync_mode = sync_mode; +job->sync_bitmap = sync_bitmap; +job->sync_bitmap_gran = bdrv_dirty_bitmap_granularity(bs, job->sync_bitmap); job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); qemu_coroutine_enter(job->common.co, job); diff --git a/block/mirror.c b/block/mirror.c index cc0665b..ccab15a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -617,6 +617,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode; BlockDriverState *base; +if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { +error_setg(errp, "Sync mode 'dirty-bitmap' not
[Qemu-devel] [PATCH 5/9] block: Handle error of bdrv_getlength in bdrv_create_dirty_bitmap
bdrv_getlength could fail, check the return value before using it. Signed-off-by: Fam Zheng --- block.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index f7e6851..cc9c530 100644 --- a/block.c +++ b/block.c @@ -4568,7 +4568,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } granularity >>= BDRV_SECTOR_BITS; assert(granularity); -bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); +bitmap_size = bdrv_getlength(bs); +if (bitmap_size < 0) { +error_setg(errp, "could not get length of device"); +return NULL; +} +bitmap_size >>= BDRV_SECTOR_BITS; bitmap = g_malloc0(sizeof(BdrvDirtyBitmap)); bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); bitmap->refcnt = 1; -- 1.8.5.2
[Qemu-devel] [PATCH 9/9] qapi: Add transaction support to dirty-bitmap-{add, disable}
This adds dirty-bitmap-add and dirty-bitmap-disable to transactions. With this, user can stop a dirty bitmap, start backup of it, and start another dirty bitmap atomically, so that the dirty bitmap is tracked incrementally and we don't miss any write. Signed-off-by: Fam Zheng --- blockdev.c | 68 qapi-schema.json | 4 +++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 090a681..3a8fbf2 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1335,6 +1335,64 @@ static void drive_backup_abort(BlkTransactionState *common) } } +static void dirty_bitmap_add_prepare(BlkTransactionState *common, Error **errp) +{ +DirtyBitmap *action; +Error *local_err = NULL; + +action = common->action->dirty_bitmap_add; +qmp_dirty_bitmap_add(action->device, action->name, false, 0, &local_err); +if (error_is_set(&local_err)) { +error_propagate(errp, local_err); +} +} + +static void dirty_bitmap_add_abort(BlkTransactionState *common) +{ +DirtyBitmap *action; +BdrvDirtyBitmap *bm; +BlockDriverState *bs; + +action = common->action->dirty_bitmap_add; +bs = bdrv_find(action->device); +if (bs) { +bm = bdrv_find_dirty_bitmap(bs, action->name); +if (bm) { +bdrv_release_dirty_bitmap(bs, bm); +} +} +} + +static void dirty_bitmap_disable_prepare(BlkTransactionState *common, + Error **errp) +{ +DirtyBitmap *action; +Error *local_err = NULL; + +action = common->action->dirty_bitmap_disable; +qmp_dirty_bitmap_disable(action->device, action->name, + false, 0, &local_err); +if (error_is_set(&local_err)) { +error_propagate(errp, local_err); +} +} + +static void dirty_bitmap_disable_abort(BlkTransactionState *common) +{ +DirtyBitmap *action; +BdrvDirtyBitmap *bitmap; +BlockDriverState *bs; + +action = common->action->dirty_bitmap_disable; +bs = bdrv_find(action->device); +if (bs) { +bitmap = bdrv_find_dirty_bitmap(bs, action->name); +if (bitmap) { +bdrv_enable_dirty_bitmap(bs, bitmap); +} +} +} + static void abort_prepare(BlkTransactionState *common, Error **errp) { error_setg(errp, "Transaction aborted using Abort action"); @@ -1367,6 +1425,16 @@ static const BdrvActionOps actions[] = { .prepare = internal_snapshot_prepare, .abort = internal_snapshot_abort, }, +[TRANSACTION_ACTION_KIND_DIRTY_BITMAP_ADD] = { +.instance_size = sizeof(BlkTransactionState), +.prepare = dirty_bitmap_add_prepare, +.abort = dirty_bitmap_add_abort, +}, +[TRANSACTION_ACTION_KIND_DIRTY_BITMAP_DISABLE] = { +.instance_size = sizeof(BlkTransactionState), +.prepare = dirty_bitmap_disable_prepare, +.abort = dirty_bitmap_disable_abort, +}, }; /* diff --git a/qapi-schema.json b/qapi-schema.json index 8a81026..22cf010 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1843,7 +1843,9 @@ 'blockdev-snapshot-sync': 'BlockdevSnapshot', 'drive-backup': 'DriveBackup', 'abort': 'Abort', - 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal' + 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal', + 'dirty-bitmap-add': 'DirtyBitmap', + 'dirty-bitmap-disable': 'DirtyBitmap' } } ## -- 1.8.5.2
[Qemu-devel] [PATCH 1/9] block: Introduce reference count for dirty bitmaps
A dirty bitmap may be created by user via QMP, then reference by other QMP commands, such as backup. We need reference count machanism to manage the lifecycle of dirty bitmap. This adds bdrv_release_dirty_bitmap and changes bdrv_release_dirty_bitmap to only free the structure when refcnt goes to 0. Signed-off-by: Fam Zheng --- block.c | 17 ++--- include/block/block.h | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 64e7d22..6ad0368 100644 --- a/block.c +++ b/block.c @@ -51,6 +51,7 @@ struct BdrvDirtyBitmap { HBitmap *bitmap; +int refcnt; QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -4543,6 +4544,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity) bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); bitmap = g_malloc0(sizeof(BdrvDirtyBitmap)); bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); +bitmap->refcnt = 1; QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); return bitmap; } @@ -4552,14 +4554,23 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) BdrvDirtyBitmap *bm, *next; QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { if (bm == bitmap) { -QLIST_REMOVE(bitmap, list); -hbitmap_free(bitmap->bitmap); -g_free(bitmap); +assert(bitmap->refcnt > 0); +bitmap->refcnt--; +if (bitmap->refcnt == 0) { +QLIST_REMOVE(bitmap, list); +hbitmap_free(bitmap->bitmap); +g_free(bitmap); +} return; } } } +void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ +bitmap->refcnt++; +} + BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) { BdrvDirtyBitmap *bm; diff --git a/include/block/block.h b/include/block/block.h index 36efaea..0c776e3 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -426,6 +426,7 @@ struct HBitmapIter; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -- 1.8.5.2
[Qemu-devel] [PATCH 2/9] qapi: Add optional field "name" to block dirty bitmap
This field will be set for user created dirty bitmap. Also pass in an error pointer to bdrv_create_dirty_bitmap, so when a name is already taken on this BDS, it can report an error message. This is not global check, two BDSes can have dirty bitmap with a common name. Implemented bdrv_find_dirty_bitmap to find a dirty bitmap by name, will be used later when other QMP commands want to reference dirty bitmap by name. Signed-off-by: Fam Zheng --- block-migration.c | 3 ++- block.c | 29 - block/mirror.c| 2 +- include/block/block.h | 7 ++- qapi-schema.json | 4 +++- 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/block-migration.c b/block-migration.c index 897fdba..e6e016a 100644 --- a/block-migration.c +++ b/block-migration.c @@ -315,7 +315,8 @@ static void set_dirty_tracking(void) BlkMigDevState *bmds; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { -bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE); +bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE, + NULL, NULL); } } diff --git a/block.c b/block.c index 6ad0368..16ef61b 100644 --- a/block.c +++ b/block.c @@ -52,6 +52,7 @@ struct BdrvDirtyBitmap { HBitmap *bitmap; int refcnt; +char name[1024]; QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -4532,19 +4533,43 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity) +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, +const char *name) +{ +BdrvDirtyBitmap *bm; +QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { +if (!strcmp(name, bm->name)) { +return bm; +} +} +return NULL; +} + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + int granularity, + const char *name, + Error **errp) { int64_t bitmap_size; BdrvDirtyBitmap *bitmap; assert((granularity & (granularity - 1)) == 0); +if (name && bdrv_find_dirty_bitmap(bs, name)) { +if (errp) { +error_setg(errp, "Bitmap already exists: %s", name); +} +return NULL; +} granularity >>= BDRV_SECTOR_BITS; assert(granularity); bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); bitmap = g_malloc0(sizeof(BdrvDirtyBitmap)); bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); bitmap->refcnt = 1; +if (name) { +pstrcpy(bitmap->name, sizeof(bitmap->name), name); +} QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); return bitmap; } @@ -4583,6 +4608,8 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) info->count = bdrv_get_dirty_count(bs, bm); info->granularity = ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap)); +info->has_name = bm->name[0] != '\0'; +info->name = g_strdup(bm->name); entry->value = info; *plist = entry; plist = &entry->next; diff --git a/block/mirror.c b/block/mirror.c index 2932bab..cc0665b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -598,7 +598,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->granularity = granularity; s->buf_size = MAX(buf_size, granularity); -s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity); +s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); bdrv_set_enable_write_cache(s->target, true); bdrv_set_on_error(s->target, on_target_error, on_target_error); bdrv_iostatus_enable(s->target); diff --git a/include/block/block.h b/include/block/block.h index 0c776e3..8dafa42 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -424,7 +424,12 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); struct HBitmapIter; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity); +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + int granularity, + const char *name, + Error **errp); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, +const char *name); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_reference_dirty_bitmap(BlockDriverState *b
[Qemu-devel] [PATCH 6/9] block: Introduce bdrv_dirty_bitmap_granularity()
This returns the granularity (in sectors) of dirty bitmap. Signed-off-by: Fam Zheng --- block.c | 6 ++ include/block/block.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/block.c b/block.c index cc9c530..0e59a9a 100644 --- a/block.c +++ b/block.c @@ -4637,6 +4637,12 @@ int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector } } +int bdrv_dirty_bitmap_granularity(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap) +{ +return hbitmap_granularity(bitmap->bitmap); +} + void bdrv_dirty_iter_init(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) { diff --git a/include/block/block.h b/include/block/block.h index 6c88f7a..858baad 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -434,6 +434,8 @@ void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +int bdrv_dirty_bitmap_granularity(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -- 1.8.5.2
[Qemu-devel] [PATCH 8/9] qmp: Add dirty-bitmap-disable command
This will put the dirty bitmap into a disabled state and no more writes will be tracked. It will be used before backup or writing to persistent file. Signed-off-by: Fam Zheng --- block.c | 15 +++ blockdev.c| 22 ++ include/block/block.h | 2 ++ qapi-schema.json | 16 qmp-commands.hx | 5 + 5 files changed, 60 insertions(+) diff --git a/block.c b/block.c index 0e59a9a..62efb93 100644 --- a/block.c +++ b/block.c @@ -53,6 +53,7 @@ struct BdrvDirtyBitmap { HBitmap *bitmap; int refcnt; char name[1024]; +bool enabled; QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -4580,6 +4581,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, if (name) { pstrcpy(bitmap->name, sizeof(bitmap->name), name); } +bitmap->enabled = true; QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); return bitmap; } @@ -4606,6 +4608,16 @@ void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) bitmap->refcnt++; } +void bdrv_disable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ +bitmap->enabled = false; +} + +void bdrv_enable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ +bitmap->enabled = true; +} + BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) { BdrvDirtyBitmap *bm; @@ -4654,6 +4666,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { +if (!bitmap->enabled) { +continue; +} hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } } diff --git a/blockdev.c b/blockdev.c index ac00562..090a681 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1694,6 +1694,28 @@ void qmp_dirty_bitmap_remove(const char *device, const char *name, bdrv_release_dirty_bitmap(bs, bitmap); } +void qmp_dirty_bitmap_disable(const char *device, const char *name, + bool has_granularity, int64_t granularity, + Error **errp) +{ +BlockDriverState *bs; +BdrvDirtyBitmap *bitmap; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} + +bitmap = bdrv_find_dirty_bitmap(bs, name); +if (!bitmap) { +error_setg(errp, "Dirty bitmap not found: %s", name); +return; +} + +bdrv_disable_dirty_bitmap(bs, bitmap); +} + int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) { const char *id = qdict_get_str(qdict, "id"); diff --git a/include/block/block.h b/include/block/block.h index 858baad..8b9b142 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -433,6 +433,8 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); void bdrv_reference_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_disable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); int bdrv_dirty_bitmap_granularity(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); diff --git a/qapi-schema.json b/qapi-schema.json index eef894c..8a81026 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2109,6 +2109,22 @@ 'data': { 'device': 'str', 'name': 'str' } } ## +# @dirty-bitmap-disable +# +# Disable a dirty bitmap on the device +# +# Setting granularity has no effect here. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If @name is not found, GenericError with an explaining message +# +# Since 2.0 +## +{'command': 'dirty-bitmap-disable', + 'data': 'DirtyBitmap' } + +## # @migrate_cancel # # Cancel the current executing migration process. diff --git a/qmp-commands.hx b/qmp-commands.hx index eaae70e..88b5f58 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1074,6 +1074,11 @@ EQMP .args_type = "device:B,name:s", .mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_remove, }, +{ +.name = "dirty-bitmap-disable", +.args_type = "device:B,name:s", +.mhandler.cmd_new = qmp_marshal_input_dirty_bitmap_disable, +}, SQMP -- 1.8.5.2
Re: [Qemu-devel] [PATCH] block/vmdk: add basic .bdrv_check support
On Mon, 01/13 12:24, Peter Lieven wrote: > this adds a basic vmdk corruption check. it should detect severe > table corruptions and file truncation. > > Signed-off-by: Peter Lieven > --- > block/vmdk.c | 46 ++ > 1 file changed, 46 insertions(+) > > diff --git a/block/vmdk.c b/block/vmdk.c > index c6b60b4..1d66858 100644 > --- a/block/vmdk.c > +++ b/block/vmdk.c > @@ -1918,6 +1918,51 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent > *extent) > return info; > } > > +static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, > + BdrvCheckMode fix) > +{ > +BDRVVmdkState *s = bs->opaque; > +VmdkExtent *extent = NULL; > +int64_t sector_num = 0; > +int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; > +int ret; > +uint64_t cluster_offset; > + > +if (fix) { > +return -ENOTSUP; > +} > + > +for (;;) { > +if (sector_num >= total_sectors) { > +return 0; > +} > +extent = find_extent(s, sector_num, extent); > +if (!extent) { > +fprintf(stderr, "ERROR: could not find extend for sector %ld\n", > +sector_num); > +break; > +} > +ret = get_cluster_offset(bs, extent, NULL, sector_num << > BDRV_SECTOR_BITS, > + 0, &cluster_offset); > +if (ret == VMDK_ERROR) { > +fprintf(stderr, > +"ERROR: could not get cluster_offset for sector %ld\n", > +sector_num); > +break; > +} > +if (ret == VMDK_OK && cluster_offset >= > bdrv_getlength(extent->file)) { > +fprintf(stderr, > +"ERROR: cluster offset for sector %ld points after > EOF\n", > +sector_num); > +break; > +} > +sector_num += extent->cluster_sectors; > +} > + > +result->corruptions++; > +return 0; > +} > + > static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) > { > int i; > @@ -1991,6 +2036,7 @@ static BlockDriver bdrv_vmdk = { > .instance_size= sizeof(BDRVVmdkState), > .bdrv_probe = vmdk_probe, > .bdrv_open= vmdk_open, > +.bdrv_check = vmdk_check, > .bdrv_reopen_prepare = vmdk_reopen_prepare, > .bdrv_read= vmdk_co_read, > .bdrv_write = vmdk_co_write, Works for me. Thank. And BTW, is it worth comparing with the "grain_offset" header field? This should be the first data cluster's offset. When you have a fresh image with no data cluster allocated, and it's truncated, this patch doesn't catch it. Anyway, Reviewed-by: Fam Zheng
Re: [Qemu-devel] Question about drive mirror
On Mon, 01/13 23:44, rudy...@163.com wrote: > Hi,everyone. > I tested the capability of drive mirror, I found the IO is low. Then I read > the code, > The code mirror_run() will call mirror_iteration() to read the size of buffer > data > from source storage, when the read callback ,and then in mirror_read_complete > () > write the data to the target storage, It is serial. > Now, I hope when it is writing the data to target storage ,we can send the > request > of reading data from source storage. Because of using coroutine to do it > ,there is > some troubles to achieve it. why not use Multi-thread? > Some one can give me some idea? Hi, QEMU block layer has been using coroutine as the program model even before introducing these block jobs, so it was nature that block mirror followed this style. With coroutines, I believe it is as possible as with multi-thread, to have more outstanding IO requests, in order to speed up the mirroring. What's the trouble you have? Multi-threading needs much more synchronization mechanism than what we have in block interface now. One day it may be possible to move block job to a thread, but it will require quite some work. Fam
Re: [Qemu-devel] [PATCH v15 6/9] module: implement module loading
On Mon, 01/13 14:15, Richard Henderson wrote: > On 01/13/2014 08:59 AM, Paolo Bonzini wrote: > > +echo "CONFIG_STAMP=`date +%s`_$$_$RANDOM" >> $config_host_mak > > I really really don't like random numbers that make for non-repeatable builds. > It's a quality-assurance nightmare. Can you elaborate this, please? > > If you want a non-random number unique to the build, try > > git log -n1 --format=format:%H > > and perhaps a file containing that hash created by scripts/make-release. What if the source code is not in a git tree, for example a tarball? Thanks, Fam
Re: [Qemu-devel] [PATCH v15 6/9] module: implement module loading
On Mon, 01/13 22:09, Peter Maydell wrote: > On 13 January 2014 16:59, Paolo Bonzini wrote: > > From: Fam Zheng > > > > This patch adds loading, stamp checking and initialization of modules. > > > > The init function of dynamic module is no longer directly called as > > __attribute__((constructor)) in static linked version, it is called > > only after passed the checking of presense of stamp symbol: > > > > qemu_stamp_$(date +%s$$$RANDOM) > > > > With this, modules built from a different tree/version/configure will > > not be loaded. > > > > The module loading code requires gmodule-2.0. > > > > Signed-off-by: Fam Zheng > > Signed-off-by: Paolo Bonzini > > --- > > Makefile |3 + > > configure | 31 +- > > include/qemu/module.h | 12 + > > rules.mak |7 ++- > > scripts/create_config | 14 ++ > > util/module.c | 107 > > - > > 6 files changed, 158 insertions(+), 16 deletions(-) > > > > diff --git a/Makefile b/Makefile > > index 9de66cb..670ce44 100644 > > --- a/Makefile > > +++ b/Makefile > > @@ -203,6 +203,9 @@ Makefile: $(version-obj-y) $(version-lobj-y) > > libqemustub.a: $(stub-obj-y) > > libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o > > > > +block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) > > NULL > > +util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)' > > + > > ## > > > > qemu-img.o: qemu-img-cmds.h > > diff --git a/configure b/configure > > index 6b46c66..c382044 100755 > > --- a/configure > > +++ b/configure > > @@ -677,7 +677,8 @@ for opt do > >;; > >--disable-debug-info) > >;; > > - --enable-modules) modules="yes" > > + --enable-modules) > > + modules="yes" > >;; > >--cpu=*) > >;; > > @@ -1130,7 +1131,7 @@ Advanced options (experts only): > >--libdir=PATHinstall libraries in PATH > >--sysconfdir=PATHinstall config in PATH$confsuffix > >--localstatedir=PATH install local state in PATH (set at runtime on > > win32) > > - --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and > > sysconfdir [$confsuffix] > > + --with-confsuffix=SUFFIX suffix for QEMU data inside > > datadir/libdir/sysconfdir [$confsuffix] > >--enable-modules enable modules support > >--enable-debug-tcg enable TCG debugging > >--disable-debug-tcg disable TCG debugging (default) > > @@ -2346,15 +2347,19 @@ if test "$mingw32" = yes; then > > else > > glib_req_ver=2.12 > > fi > > -if $pkg_config --atleast-version=$glib_req_ver gthread-2.0; then > > -glib_cflags=`$pkg_config --cflags gthread-2.0` > > -glib_libs=`$pkg_config --libs gthread-2.0` > > -CFLAGS="$glib_cflags $CFLAGS" > > -LIBS="$glib_libs $LIBS" > > -libs_qga="$glib_libs $libs_qga" > > -else > > -error_exit "glib-$glib_req_ver required to compile QEMU" > > -fi > > + > > +for i in gthread-2.0 gmodule-2.0; do > > +if $pkg_config --atleast-version=$glib_req_ver $i; then > > +glib_cflags=`$pkg_config --cflags $i` > > +glib_libs=`$pkg_config --libs $i` > > +CFLAGS="$glib_cflags $CFLAGS" > > +LIBS="$glib_libs $LIBS" > > +libs_qga="$glib_libs $libs_qga" > > +else > > +error_exit "glib-$glib_req_ver required to compile QEMU" > > +fi > > +done > > Also, isn't this going to give rather unhelpful error messages if > the system has glib and gthread but not gmodule? (Or is it > guaranteed that they come as a set? If so then we can either > not bother checking or print a "your system's glib seems to be > busted" message...) > I think they all usually come with one glib package. But it doean't hurt to check, it's easy enough to include $i in the error message. Fam
Re: [Qemu-devel] [PATCH v15 6/9] module: implement module loading
On Mon, 01/13 22:05, Peter Maydell wrote: > On 13 January 2014 16:59, Paolo Bonzini wrote: > > From: Fam Zheng > > > > This patch adds loading, stamp checking and initialization of modules. > > > > The init function of dynamic module is no longer directly called as > > __attribute__((constructor)) in static linked version, it is called > > only after passed the checking of presense of stamp symbol: > > > > qemu_stamp_$(date +%s$$$RANDOM) > > > > > +echo "CONFIG_STAMP=`date +%s`_$$_$RANDOM" >> $config_host_mak > > This is not really a good idea because $RANDOM is a bashism > and our configure script is a generic POSIX shell script. In > particular if you configure on Ubuntu you're likely to find that /bin/sh > is dash and $RANDOM silently expands to the empty string. > > (One day I will clean up the tempfile generation stuff to fix > its misuse of $RANDOM...) > So what's the best way to generate random number here? > > +void DSO_STAMP_FUN(void); > > +/* For error message, this function is an identification of qemu module */ > > What is this comment trying to say? Existence of this symbol indicates it *is* a qemu module. Where we continue to check for existence of DSO_STAMP_FUN, and report it's only the wrong version. Thanks, Fam
Re: [Qemu-devel] [PATCH v15 0/9] Shared library module support
On Mon, 01/13 22:01, Peter Maydell wrote: > On 13 January 2014 16:59, Paolo Bonzini wrote: > > This is based on Fam's patches from October. Very few changes > > apart from rebasing: > > > > * I split his patch 8 in two parts. There is benefit in > > using per-object cflags and libs even before the module-loading > > machinery gets in. > > > > * I added a new patch "darwin: do not use -mdynamic-no-pic". > > CCing Alex Graf for it. > > > > * applied the small change I had requested a small change in patch 2 > > > > I'm not sending a pull request yet because of these two changes, > > but I'll be sending one in a few days. > > This doesn't build on MacOSX if you try to enable modules with > --enable-modules: > module-common.c is missing in patch 6/9, so it failed. Fam
Re: [Qemu-devel] [PATCH 2/2] block: resize backing image during active layer commit, if needed
On Mon, 01/13 15:18, Jeff Cody wrote: > If the top image to commit is the active layer, and also larger than > the base image, then an I/O error will likely be returned during > block-commit. > > For instance, if we have a base image with a virtual size 10G, and a > active layer image of size 20G, then committing the snapshot via > 'block-commit' will likely fail. > > This will automatically attempt to resize the base image, if the > active layer image to be committed is larger. > > Signed-off-by: Jeff Cody > --- > block/mirror.c | 13 + > 1 file changed, 13 insertions(+) > > diff --git a/block/mirror.c b/block/mirror.c > index 2932bab..c4e42fa 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -630,9 +630,22 @@ void commit_active_start(BlockDriverState *bs, > BlockDriverState *base, > BlockDriverCompletionFunc *cb, > void *opaque, Error **errp) > { > +int64_t length; > if (bdrv_reopen(base, bs->open_flags, errp)) { > return; > } "base" is already reopened here. > + > +length = bdrv_getlength(bs); > + > +if (length > bdrv_getlength(base)) { > +if (bdrv_truncate(base, length) < 0) { > +error_setg(errp, "Top image %s is larger than base image %s, and > " > + "resize of base image failed.", > + bs->filename, base->filename); > +return; Should we restore open flags for base? Thanks, Fam > +} > +} > + > bdrv_ref(base); > mirror_start_job(bs, base, speed, 0, 0, > on_error, on_error, cb, opaque, errp, > -- > 1.8.3.1 >
Re: [Qemu-devel] [PATCHv6 6/6] qemu-iotests: blacklist test 020 for NFS protocol
On Mon, 01/13 11:21, Peter Lieven wrote: > reopening is currently not supported. > > Signed-off-by: Peter Lieven > --- > tests/qemu-iotests/020 |5 + > 1 file changed, 5 insertions(+) > > diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 > index a42f32f..f8a849c 100755 > --- a/tests/qemu-iotests/020 > +++ b/tests/qemu-iotests/020 > @@ -46,6 +46,11 @@ _supported_fmt qcow qcow2 vmdk qed > _supported_proto file > _supported_os Linux > > +# NFS does not support bdrv_reopen_prepare thus qemu-img commit fails. > +if [ "$IMGPROTO" = "nfs" ]; then > +_notrun "image protocol $IMGPROTO does not support bdrv_commit" > +fi > + Doesn't "_supported_proto file" above already skip this case? Fam
Re: [Qemu-devel] [PATCHv6 5/6] qemu-iotests: fix expected output of test 067
On Mon, 01/13 11:21, Peter Lieven wrote: > Signed-off-by: Peter Lieven > --- > tests/qemu-iotests/067.out |8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out > index 8d271cc..79ed90f 100644 > --- a/tests/qemu-iotests/067.out > +++ b/tests/qemu-iotests/067.out > @@ -12,7 +12,7 @@ QMP_VERSION > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"path": > "/machine/peripheral/virtio0/virtio-backend"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"device": "virtio0", "path": > "/machine/peripheral/virtio0"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "RESET"} > -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, > "removable": true, "tray_open": false, "type": "unknown"}, {"device": > "floppy0", "locked": false, "removable": true, "tray_open": false, "type": > "unknown"}, {"device": "sd0", "locked": false, "removable": true, > "tray_open": false, "type": "unknown"}]} > +{"return": [{"io-status": "ok", "device": "disk", "locked": false, > "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": > 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": > "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": > {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": > 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": > 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", > "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", > "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, > "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, > "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, > "removable": true, "tray_open": false, "type": "unknown"}]} > {"return": {}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "SHUTDOWN"} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} > @@ -31,7 +31,7 @@ QMP_VERSION > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"path": > "/machine/peripheral/virtio0/virtio-backend"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"device": "virtio0", "path": > "/machine/peripheral/virtio0"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "RESET"} > -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, > "removable": true, "tray_open": false, "type": "unknown"}, {"device": > "floppy0", "locked": false, "removable": true, "tray_open": false, "type": > "unknown"}, {"device": "sd0", "locked": false, "removable": true, > "tray_open": false, "type": "unknown"}]} > +{"return": [{"io-status": "ok", "device": "disk", "locked": false, > "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": > 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": > "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": > {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": > 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": > 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", > "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", > "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, > "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, > "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, > "removable": true, "tray_open": false, "type": "unknown"}]} > {"return": {}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "SHUTDOWN"} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} > @@ -51,7 +51,7 @@ QMP_VERSION > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"path": > "/machine/peripheral/virtio0/virtio-backend"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "DEVICE_DELETED", "data": {"device": "virtio0", "path": > "/machine/peripheral/virtio0"}} > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": > "RESET"} > -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, > "removable": true, "tray_open": false, "type": "unknown"}, {"device": > "floppy0", "locked": false, "removable": true, "tray_open
Re: [Qemu-devel] [PATCHv6 5/6] qemu-iotests: fix expected output of test 067
On Wed, 01/15 08:01, Peter Lieven wrote: > On 15.01.2014 07:54, Fam Zheng wrote: > >On Mon, 01/13 11:21, Peter Lieven wrote: > >>Signed-off-by: Peter Lieven > >>--- > >> tests/qemu-iotests/067.out |8 > >> 1 file changed, 4 insertions(+), 4 deletions(-) > >> > >>diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out > >>index 8d271cc..79ed90f 100644 > >>--- a/tests/qemu-iotests/067.out > >>+++ b/tests/qemu-iotests/067.out > >>@@ -12,7 +12,7 @@ QMP_VERSION > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "DEVICE_DELETED", "data": {"path": > >> "/machine/peripheral/virtio0/virtio-backend"}} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": > >> "/machine/peripheral/virtio0"}} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "RESET"} > >>-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, > >>"removable": true, "tray_open": false, "type": "unknown"}, {"device": > >>"floppy0", "locked": false, "removable": true, "tray_open": false, "type": > >>"unknown"}, {"device": "sd0", "locked": false, "removable": true, > >>"tray_open": false, "type": "unknown"}]} > >>+{"return": [{"io-status": "ok", "device": "disk", "locked": false, > >>"removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": > >>134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": > >>"qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": > >>{"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, > >>"iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": > >>0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": > >>"TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, > >>{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": > >>true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", > >>"locked": false, "removable": true, "tray_open": false, "type": "unknown"}, > >>{"device": "sd0", "locked": false, "removable": true, "tray_open": false, > >>"type": "unknown"}]} > >> {"return": {}} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "SHUTDOWN"} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": > >> true}} > >>@@ -31,7 +31,7 @@ QMP_VERSION > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "DEVICE_DELETED", "data": {"path": > >> "/machine/peripheral/virtio0/virtio-backend"}} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": > >> "/machine/peripheral/virtio0"}} > >> {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, > >> "event": "RESET"} > >>-{"retur
Re: [Qemu-devel] [PATCH v15 6/9] module: implement module loading
On Tue, 01/14 15:45, Daniel P. Berrange wrote: > On Tue, Jan 14, 2014 at 04:19:41PM +0100, Paolo Bonzini wrote: > > Il 14/01/2014 15:47, Richard Henderson ha scritto: > > +echo "CONFIG_STAMP=`date +%s`_$$_$RANDOM" >> $config_host_mak > > >>> >> > > >>> >> I really really don't like random numbers that make for > > >>> >> non-repeatable builds. > > >>> >> It's a quality-assurance nightmare. > > >> > > > >> > Can you elaborate this, please? > > > Build systems like we use at Red Hat want to be able to produce > > > bit-for-bit > > > identical binaries when given the exact same input. Using random numbers > > > during the build process prevents that. > > > > I totally agree, but AIUI people wanted the symbol to be something that > > you couldn't know in advance (e.g. when compiling an out-of-tree > > module). For some definition of "couldn't" and "in advance". > > You can't stop a determined person. The goal is really just to make sure > they have to jump through painful hoops if they're going to delibrately > ignore our policy that this is not for 3rd party out of tree modules to > use. > > When doing RHEL / Fedora builds, we *do* want this to change each time > the RPM is rebuilt for a new release. eg any time we add a new patch > to the RPM we want it to change, but if you're just rebuilding an > src.rpm without making changes we don't need it to be different. > > You could use a sha256 sum of 'configure content + version + pkgversion' > to get something that'd change each time distros did a formal new build, > but would still allow reproducible builds. This sounds like a nice solution, I'll adopt. Thanks. Fam
[Qemu-devel] [PATCH v16 1/9] rules.mak: fix $(obj) to a real relative path
Makefile.target includes rule.mak and unnested common-obj-y, then prefix them with '../', this will ignore object specific QEMU_CFLAGS in subdir Makefile.objs: $(obj)/curl.o: QEMU_CFLAGS += $(CURL_CFLAGS) Because $(obj) here is './block', instead of '../block'. This doesn't hurt compiling because we basically build all .o from top Makefile, before entering Makefile.target, but it will affact arriving per-object libs support. The starting point of $(obj) is passed in as argument of unnest-vars, as well as nested variables, so that different Makefiles can pass in a right value. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile| 14 ++ Makefile.objs | 17 + Makefile.target | 17 + configure | 1 + rules.mak | 14 +- 5 files changed, 38 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index bdff4e4..423ace5 100644 --- a/Makefile +++ b/Makefile @@ -122,6 +122,16 @@ defconfig: ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/Makefile.objs +endif + +dummy := $(call unnest-vars,, \ +stub-obj-y \ +util-obj-y \ +qga-obj-y \ +block-obj-y \ +common-obj-y) + +ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/tests/Makefile endif ifeq ($(CONFIG_SMARTCARD_NSS),y) @@ -130,6 +140,10 @@ endif all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all +vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) + +vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS) + config-host.h: config-host.h-timestamp config-host.h-timestamp: config-host.mak qemu-options.def: $(SRC_PATH)/qemu-options.hx diff --git a/Makefile.objs b/Makefile.objs index 857bb53..489c4b1 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -41,7 +41,7 @@ libcacard-y += libcacard/vcardt.o # single QEMU executable should support all CPUs and machines. ifeq ($(CONFIG_SOFTMMU),y) -common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/ +common-obj-y = blockdev.o blockdev-nbd.o block/ common-obj-y += net/ common-obj-y += readline.o common-obj-y += qdev-monitor.o device-hotplug.o @@ -112,18 +112,3 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo # by libqemuutil.a. These should be moved to a separate .json schema. qga-obj-y = qga/ qapi-types.o qapi-visit.o qga-vss-dll-obj-y = qga/ - -vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) - -vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS) - -QEMU_CFLAGS+=$(GLIB_CFLAGS) - -nested-vars += \ - stub-obj-y \ - util-obj-y \ - qga-obj-y \ - qga-vss-dll-obj-y \ - block-obj-y \ - common-obj-y -dummy := $(call unnest-vars) diff --git a/Makefile.target b/Makefile.target index af6ac7e..9a6e7dd 100644 --- a/Makefile.target +++ b/Makefile.target @@ -139,13 +139,22 @@ endif # CONFIG_SOFTMMU # Workaround for http://gcc.gnu.org/PR55489, see configure. %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS) -nested-vars += obj-y +dummy := $(call unnest-vars,,obj-y) -# This resolves all nested paths, so it must come last +# we are making another call to unnest-vars with different vars, protect obj-y, +# it can be overriden in subdir Makefile.objs +obj-y-save := $(obj-y) + +block-obj-y := +common-obj-y := include $(SRC_PATH)/Makefile.objs +dummy := $(call unnest-vars,..,block-obj-y common-obj-y) + +# Now restore obj-y +obj-y := $(obj-y-save) -all-obj-y = $(obj-y) -all-obj-y += $(addprefix ../, $(common-obj-y)) +all-obj-y = $(obj-y) $(common-obj-y) +all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) ifndef CONFIG_HAIKU LIBS+=-lm diff --git a/configure b/configure index 3782a6a..4990648 100755 --- a/configure +++ b/configure @@ -2345,6 +2345,7 @@ fi if $pkg_config --atleast-version=$glib_req_ver gthread-2.0; then glib_cflags=`$pkg_config --cflags gthread-2.0` glib_libs=`$pkg_config --libs gthread-2.0` +CFLAGS="$glib_cflags $CFLAGS" LIBS="$glib_libs $LIBS" libs_qga="$glib_libs $libs_qga" else diff --git a/rules.mak b/rules.mak index 49edb9b..7d27602 100644 --- a/rules.mak +++ b/rules.mak @@ -138,9 +138,6 @@ clean: clean-timestamp # magic to descend into other directories -obj := . -old-nested-dirs := - define push-var $(eval save-$2-$1 = $(value $1)) $(eval $1 :=) @@ -154,9 +151,11 @@ endef define unnest-dir $(foreach var,$(nested-vars),$(call push-var,$(var),$1/)) -$(eval obj := $(obj)/$1) +$(eval obj-parent-$1 := $(obj)) +$(eval obj := $(if $(obj),$(obj)/$1,$1)) $(eval include $(SRC_PATH)/$1/Makefile.objs) -$(eval obj := $(patsubst %/$1,%,$(obj))) +$(eval obj := $(obj-parent-$1)) +$(eval obj-parent-$1 := ) $(foreach var,$(nested-vars),$(call pop-var,$(var),$1/)) endef @@ -171,7 +170,12 @@ $(if $(nested-dirs), endef define unnest-vars +$(eval obj := $1) +$(eval nested-vars := $2) +$(eval old-nested-dirs := ) $(call unnest-vars-1) +$(if $1,$(foreach v,$(nested-vars),$(eval \ + $v := $(addprefix $1/,$($v) $(for
[Qemu-devel] [PATCH v16 4/9] darwin: do not use -mdynamic-no-pic
From: Paolo Bonzini While -mdynamic-no-pic can speed up the code somewhat, it is only used on the legacy PowerPC Mac OS X, and I am not sure if anyone is still testing that. Disabling PIC can cause problems when enabling modules, so do not do that. Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- configure | 2 -- 1 file changed, 2 deletions(-) diff --git a/configure b/configure index 9d71867..99434e6 100755 --- a/configure +++ b/configure @@ -516,8 +516,6 @@ Darwin) if [ "$cpu" = "x86_64" ] ; then QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" LDFLAGS="-arch x86_64 $LDFLAGS" - else -QEMU_CFLAGS="-mdynamic-no-pic $QEMU_CFLAGS" fi cocoa="yes" audio_drv_list="coreaudio" -- 1.8.5.2
[Qemu-devel] [PATCH v16 0/9] Shared library module support
A few changes on Paolo's v15, to fix MacOSX build (in fact fix Linux as well) and get rid of $RANDOM: [05/09] build-sys: introduce common-obj-m and block-obj-m for DSO Add " -undefined dynamic_lookup" to Darwin LDFLAGS. Otherwise the linker complains about undefined symbols. [06/09] module: implement module loading Don't use $RANDOM for stamp symbol generation. Hash version, pkgversion and configure content instead. Add back module-common.c from previous revisions. Reformat module_init macro definition to keep scripts/checkpatch.pl happy. Improve error message when gmodule not present. Fam Zheng (8): rules.mak: fix $(obj) to a real relative path rules.mak: allow per object cflags and libs block: use per-object cflags and libs build-sys: introduce common-obj-m and block-obj-m for DSO module: implement module loading Makefile: install modules with "make install" .gitignore: ignore module related files (dll, so, mo) block: convert block drivers linked with libs to modules Paolo Bonzini (1): darwin: do not use -mdynamic-no-pic .gitignore| 3 ++ Makefile | 30 +- Makefile.objs | 19 ++--- Makefile.target | 21 -- block/Makefile.objs | 13 +- configure | 79 ++--- include/qemu/module.h | 18 - module-common.c | 10 + rules.mak | 80 +++-- scripts/create_config | 14 +++ util/module.c | 107 +- 11 files changed, 325 insertions(+), 69 deletions(-) create mode 100644 module-common.c -- 1.8.5.2
[Qemu-devel] [PATCH v16 8/9] .gitignore: ignore module related files (dll, so, mo)
Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 1c9d63d..7702b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ fsdev/virtfs-proxy-helper.pod *.cp *.dvi *.exe +*.dll +*.so +*.mo *.fn *.ky *.log -- 1.8.5.2
[Qemu-devel] [PATCH v16 2/9] rules.mak: allow per object cflags and libs
Adds extract-libs in LINK to expand any "per object libs", the syntax to define such a libs options is like: foo.o-libs := $(CURL_LIBS) in block/Makefile.objs. Similarly, foo.o-cflags := $(FOO_CFLAGS) is also supported. "foo.o" must be listed in a nested var (e.g. common-obj-y) to make the option variables effective. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- rules.mak | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/rules.mak b/rules.mak index 7d27602..9398268 100644 --- a/rules.mak +++ b/rules.mak @@ -21,15 +21,17 @@ QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(*D)/$(*F).d # Same as -I$(SRC_PATH) -I., but for the nested source/object directories QEMU_INCLUDES += -I$(
[Qemu-devel] [PATCH v16 7/9] Makefile: install modules with "make install"
Install all the modules to ${MODDIR}. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile | 6 ++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 670ce44..a91f119 100644 --- a/Makefile +++ b/Makefile @@ -371,6 +371,12 @@ install-datadir install-localstatedir ifneq ($(TOOLS),) $(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)" endif +ifneq ($(CONFIG_MODULES),) + $(INSTALL_DIR) "$(DESTDIR)$(moddir)" + for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \ + $(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(moddir)/$${s//\//-}"; \ + done +endif ifneq ($(HELPERS-y),) $(INSTALL_DIR) "$(DESTDIR)$(libexecdir)" $(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)" -- 1.8.5.2
[Qemu-devel] [PATCH v16 9/9] block: convert block drivers linked with libs to modules
The converted block drivers are: curl iscsi rbd ssh glusterfs Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- configure | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 77f09ba..80093ea 100755 --- a/configure +++ b/configure @@ -4071,7 +4071,7 @@ if test "$bswap_h" = "yes" ; then echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak fi if test "$curl" = "yes" ; then - echo "CONFIG_CURL=y" >> $config_host_mak + echo "CONFIG_CURL=m" >> $config_host_mak echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak echo "CURL_LIBS=$curl_libs" >> $config_host_mak fi @@ -4162,7 +4162,7 @@ if test "$glx" = "yes" ; then fi if test "$libiscsi" = "yes" ; then - echo "CONFIG_LIBISCSI=y" >> $config_host_mak + echo "CONFIG_LIBISCSI=m" >> $config_host_mak if test "$libiscsi_version" = "1.4.0"; then echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak fi @@ -4188,7 +4188,7 @@ if test "$qom_cast_debug" = "yes" ; then echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak fi if test "$rbd" = "yes" ; then - echo "CONFIG_RBD=y" >> $config_host_mak + echo "CONFIG_RBD=m" >> $config_host_mak echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak echo "RBD_LIBS=$rbd_libs" >> $config_host_mak fi @@ -4233,7 +4233,7 @@ if test "$getauxval" = "yes" ; then fi if test "$glusterfs" = "yes" ; then - echo "CONFIG_GLUSTERFS=y" >> $config_host_mak + echo "CONFIG_GLUSTERFS=m" >> $config_host_mak echo "GLUSTERFS_CFLAGS=$glusterfs_cflags" >> $config_host_mak echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak fi @@ -4243,7 +4243,7 @@ if test "$glusterfs_discard" = "yes" ; then fi if test "$libssh2" = "yes" ; then - echo "CONFIG_LIBSSH2=y" >> $config_host_mak + echo "CONFIG_LIBSSH2=m" >> $config_host_mak echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak fi -- 1.8.5.2
[Qemu-devel] [PATCH v16 3/9] block: use per-object cflags and libs
No longer adds flags and libs for them to global variables, instead create config-host.mak variables like FOO_CFLAGS and FOO_LIBS, which is used as per object cflags and libs. This removes unwanted dependencies from libcacard. Signed-off-by: Fam Zheng [Split from Fam's patch to enable modules. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- block/Makefile.objs | 13 - configure | 25 ++--- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/block/Makefile.objs b/block/Makefile.objs index 4e8c91e..a1db63f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -23,4 +23,15 @@ common-obj-y += commit.o common-obj-y += mirror.o common-obj-y += backup.o -$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS) +iscsi.o-cflags := $(LIBISCSI_CFLAGS) +iscsi.o-libs := $(LIBISCSI_LIBS) +curl.o-cflags := $(CURL_CFLAGS) +curl.o-libs:= $(CURL_LIBS) +rbd.o-cflags := $(RBD_CFLAGS) +rbd.o-libs := $(RBD_LIBS) +gluster.o-cflags := $(GLUSTERFS_CFLAGS) +gluster.o-libs := $(GLUSTERFS_LIBS) +ssh.o-cflags := $(LIBSSH2_CFLAGS) +ssh.o-libs := $(LIBSSH2_LIBS) +qcow.o-libs:= -lz +linux-aio.o-libs := -laio diff --git a/configure b/configure index 4990648..9d71867 100755 --- a/configure +++ b/configure @@ -2303,8 +2303,6 @@ EOF curl_libs=`$curlconfig --libs 2>/dev/null` if compile_prog "$curl_cflags" "$curl_libs" ; then curl=yes -libs_tools="$curl_libs $libs_tools" -libs_softmmu="$curl_libs $libs_softmmu" else if test "$curl" = "yes" ; then feature_not_found "curl" @@ -2460,8 +2458,6 @@ EOF rbd_libs="-lrbd -lrados" if compile_prog "" "$rbd_libs" ; then rbd=yes -libs_tools="$rbd_libs $libs_tools" -libs_softmmu="$rbd_libs $libs_softmmu" else if test "$rbd" = "yes" ; then feature_not_found "rados block device" @@ -2478,9 +2474,6 @@ if test "$libssh2" != "no" ; then libssh2_cflags=`$pkg_config libssh2 --cflags` libssh2_libs=`$pkg_config libssh2 --libs` libssh2=yes -libs_tools="$libssh2_libs $libs_tools" -libs_softmmu="$libssh2_libs $libs_softmmu" -QEMU_CFLAGS="$QEMU_CFLAGS $libssh2_cflags" else if test "$libssh2" = "yes" ; then error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2" @@ -2526,8 +2519,6 @@ int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); retu EOF if compile_prog "" "-laio" ; then linux_aio=yes -libs_softmmu="$libs_softmmu -laio" -libs_tools="$libs_tools -laio" else if test "$linux_aio" = "yes" ; then feature_not_found "linux AIO" @@ -2696,9 +2687,6 @@ if test "$glusterfs" != "no" ; then glusterfs="yes" glusterfs_cflags=`$pkg_config --cflags glusterfs-api` glusterfs_libs=`$pkg_config --libs glusterfs-api` -CFLAGS="$CFLAGS $glusterfs_cflags" -libs_tools="$glusterfs_libs $libs_tools" -libs_softmmu="$glusterfs_libs $libs_softmmu" if $pkg_config --atleast-version=5 glusterfs-api; then glusterfs_discard="yes" fi @@ -3066,11 +3054,9 @@ EOF libiscsi="yes" libiscsi_cflags=$($pkg_config --cflags libiscsi) libiscsi_libs=$($pkg_config --libs libiscsi) -CFLAGS="$CFLAGS $libiscsi_cflags" -LIBS="$LIBS $libiscsi_libs" elif compile_prog "" "-liscsi" ; then libiscsi="yes" -LIBS="$LIBS -liscsi" +libiscsi_libs="-liscsi" else if test "$libiscsi" = "yes" ; then feature_not_found "libiscsi" @@ -4068,6 +4054,7 @@ fi if test "$curl" = "yes" ; then echo "CONFIG_CURL=y" >> $config_host_mak echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak + echo "CURL_LIBS=$curl_libs" >> $config_host_mak fi if test "$brlapi" = "yes" ; then echo "CONFIG_BRLAPI=y" >> $config_host_mak @@ -4160,6 +4147,8 @@ if test "$libiscsi" = "yes" ; then if test "$libiscsi_version" = "1.4.0"; then echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak fi + echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak + echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak fi if test "$seccomp" = "yes"; then @@ -4181,6 +4170,8 @@ if test "$qom_cast_debug" = "yes" ; then fi if test "$rbd" = "yes" ; the
[Qemu-devel] [PATCH v16 6/9] module: implement module loading
This patch adds loading, stamp checking and initialization of modules. The init function of dynamic module is no longer directly called as __attribute__((constructor)) in static linked version, it is called only after passed the checking of presense of stamp symbol: qemu_stamp_$RELEASEHASH where $RELEASEHASH is generated by hashing version strings and content of configure script. With this, modules built from a different tree/version/configure will not be loaded. The module loading code requires gmodule-2.0. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile | 3 ++ configure | 37 ++--- include/qemu/module.h | 18 - module-common.c | 10 + rules.mak | 7 ++-- scripts/create_config | 14 +++ util/module.c | 107 +- 7 files changed, 176 insertions(+), 20 deletions(-) create mode 100644 module-common.c diff --git a/Makefile b/Makefile index 9de66cb..670ce44 100644 --- a/Makefile +++ b/Makefile @@ -203,6 +203,9 @@ Makefile: $(version-obj-y) $(version-lobj-y) libqemustub.a: $(stub-obj-y) libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o +block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL +util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)' + ## qemu-img.o: qemu-img-cmds.h diff --git a/configure b/configure index a6a626f..77f09ba 100755 --- a/configure +++ b/configure @@ -677,7 +677,8 @@ for opt do ;; --disable-debug-info) ;; - --enable-modules) modules="yes" + --enable-modules) + modules="yes" ;; --cpu=*) ;; @@ -1130,7 +1131,7 @@ Advanced options (experts only): --libdir=PATHinstall libraries in PATH --sysconfdir=PATHinstall config in PATH$confsuffix --localstatedir=PATH install local state in PATH (set at runtime on win32) - --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix] + --with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix] --enable-modules enable modules support --enable-debug-tcg enable TCG debugging --disable-debug-tcg disable TCG debugging (default) @@ -2346,15 +2347,19 @@ if test "$mingw32" = yes; then else glib_req_ver=2.12 fi -if $pkg_config --atleast-version=$glib_req_ver gthread-2.0; then -glib_cflags=`$pkg_config --cflags gthread-2.0` -glib_libs=`$pkg_config --libs gthread-2.0` -CFLAGS="$glib_cflags $CFLAGS" -LIBS="$glib_libs $LIBS" -libs_qga="$glib_libs $libs_qga" -else -error_exit "glib-$glib_req_ver required to compile QEMU" -fi + +for i in gthread-2.0 gmodule-2.0; do +if $pkg_config --atleast-version=$glib_req_ver $i; then +glib_cflags=`$pkg_config --cflags $i` +glib_libs=`$pkg_config --libs $i` +CFLAGS="$glib_cflags $CFLAGS" +LIBS="$glib_libs $LIBS" +libs_qga="$glib_libs $libs_qga" +else +error_exit "glib-$glib_req_ver $i is required to compile QEMU" +fi +done + ## # pixman support probe @@ -3628,6 +3633,7 @@ if test "$mingw32" = "yes" ; then fi qemu_confdir=$sysconfdir$confsuffix +moddir=$libdir$confsuffix qemu_datadir=$datadir$confsuffix qemu_localedir="$datadir/locale" @@ -3718,6 +3724,7 @@ echo "Install prefix$prefix" echo "BIOS directory`eval echo $qemu_datadir`" echo "binary directory `eval echo $bindir`" echo "library directory `eval echo $libdir`" +echo "module directory `eval echo $moddir`" echo "libexec directory `eval echo $libexecdir`" echo "include directory `eval echo $includedir`" echo "config directory `eval echo $sysconfdir`" @@ -3849,6 +3856,7 @@ echo all: >> $config_host_mak echo "prefix=$prefix" >> $config_host_mak echo "bindir=$bindir" >> $config_host_mak echo "libdir=$libdir" >> $config_host_mak +echo "moddir=$moddir" >> $config_host_mak echo "libexecdir=$libexecdir" >> $config_host_mak echo "includedir=$includedir" >> $config_host_mak echo "mandir=$mandir" >> $config_host_mak @@ -3867,9 +3875,6 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak echo "ARCH=$ARCH" >> $config_host_mak -if test "$modules" = "yes"; then - echo "CONFIG_MODULES=y" >> $config_host_mak -fi if test "$debug_tcg" = "yes" ; then echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak fi @@ -3991,6 +3996,10 @@ echo "T
[Qemu-devel] [PATCH v16 5/9] build-sys: introduce common-obj-m and block-obj-m for DSO
Add necessary rules and flags for shared object generation. $(common-obj-m) will include $(block-obj-m), like $(common-obj-y) does for $(block-obj-y). The new rules introduced here are: 0) For all %.so compiling: QEMU_CFLAGS += -fPIC 1) %.o in $(common-obj-m) is compiled to %.o, then linked to %.so. 2) %.mo in $(common-obj-m) is the placeholder for %.so for pattern matching in Makefile. It's linked to "-shared" with all its dependencies (multiple *.o) as input. Which means the list of depended objects must be specified in each sub-Makefile.objs: foo.mo-objs := bar.o baz.o qux.o in the same style with foo.o-cflags and foo.o-libs. The objects here will be prefixed with "$(obj)/" if it's a subdirectory Makefile.objs. Also introduce --enable-modules in configure, the option will enable support of shared object build. Otherwise objects are static linked to executables. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile| 9 +++-- Makefile.objs | 2 ++ Makefile.target | 6 +- configure | 14 ++ rules.mak | 54 +- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 423ace5..9de66cb 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,9 @@ dummy := $(call unnest-vars,, \ util-obj-y \ qga-obj-y \ block-obj-y \ -common-obj-y) +block-obj-m \ +common-obj-y \ +common-obj-m) ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/tests/Makefile @@ -138,7 +140,7 @@ ifeq ($(CONFIG_SMARTCARD_NSS),y) include $(SRC_PATH)/libcacard/Makefile endif -all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all +all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) @@ -256,6 +258,9 @@ clean: rm -f qemu-options.def find . -name '*.[oda]' -type f -exec rm -f {} + find . -name '*.l[oa]' -type f -exec rm -f {} + + find . -name '*.so' -type f -exec rm -f {} + + find . -name '*.mo' -type f -exec rm -f {} + + find . -name '*.dll' -type f -exec rm -f {} + rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~ rm -f fsdev/*.pod rm -rf .libs */.libs diff --git a/Makefile.objs b/Makefile.objs index 489c4b1..56f256f 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -19,6 +19,8 @@ block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o block-obj-y += qemu-coroutine-sleep.o block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o +block-obj-m = block/ + ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy) # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add. # only pull in the actual virtio-9p device if we also enabled virtio. diff --git a/Makefile.target b/Makefile.target index 9a6e7dd..3945260 100644 --- a/Makefile.target +++ b/Makefile.target @@ -148,7 +148,11 @@ obj-y-save := $(obj-y) block-obj-y := common-obj-y := include $(SRC_PATH)/Makefile.objs -dummy := $(call unnest-vars,..,block-obj-y common-obj-y) +dummy := $(call unnest-vars,.., \ + block-obj-y \ + block-obj-m \ + common-obj-y \ + common-obj-m) # Now restore obj-y obj-y := $(obj-y-save) diff --git a/configure b/configure index 99434e6..a6a626f 100755 --- a/configure +++ b/configure @@ -205,6 +205,9 @@ mingw32="no" gcov="no" gcov_tool="gcov" EXESUF="" +DSOSUF=".so" +LDFLAGS_SHARED="-shared" +modules="no" prefix="/usr/local" mandir="\${prefix}/share/man" datadir="\${prefix}/share" @@ -513,6 +516,7 @@ OpenBSD) Darwin) bsd="yes" darwin="yes" + LDFLAGS_SHARED="-bundle -undefined dynamic_lookup" if [ "$cpu" = "x86_64" ] ; then QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" LDFLAGS="-arch x86_64 $LDFLAGS" @@ -606,6 +610,7 @@ fi if test "$mingw32" = "yes" ; then EXESUF=".exe" + DSOSUF=".dll" QEMU_CFLAGS="-DWIN32_LEAN_AND_MEAN -DWINVER=0x501 $QEMU_CFLAGS" # enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS" @@ -672,6 +677,8 @@ for opt do ;; --disable-debug-info) ;; + --enable-modules) modules="yes" + ;; --cpu=*) ;; --target-list=*) target_list="$optarg" @@ -1124,6 +1131,7 @@ Advanced options (experts only): --sysconfdir=PATHinstall config in PATH$confsuffix --localstatedir=PATH install local state in PATH (set at runtime on win32) --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir
Re: [Qemu-devel] [PATCH v16 6/9] module: implement module loading
On Wed, 01/15 11:53, Peter Maydell wrote: > On 15 January 2014 08:48, Fam Zheng wrote: > > This patch adds loading, stamp checking and initialization of modules. > > > +echo "CONFIG_STAMP=`(echo $qemu_version; echo $pkgversion; cat $0) | > > sha256sum - | cut -f1 -d\ `" >> $config_host_mak > > This fails to configure under MacOSX, I'm afraid -- there is > no sha256sum there. Can we assume shasum [1] always present on MaxOSX? (Seems it comes with perl.) [1]: https://developer.apple.com/library/mac/documentation/Darwin/Reference/Manpages/man1/shasum.1.html Fam
Re: [Qemu-devel] [PATCH v16 6/9] module: implement module loading
On Wed, 01/15 13:30, Paolo Bonzini wrote: > Il 15/01/2014 13:18, Alex Bligh ha scritto: > > I phrased that badly. I meant same output for the same calling convention, > > so > > surely just replacing 'sha256sum' with 'shasum -a 256' is going to work > > fine on > > all systems? > > I don't think all systems are guaranteed to have shasum. If we use > SHA-1, and just look for the three binaries sha1sum/sha1/shasum, we > don't have to bother about arguments and only add 10 lines or so to > configure. > Agreed. I'll add a test and probe for any one of shasum, sha256 and sha256sum. Easy enough. Fam
Re: [Qemu-devel] [PATCH v16 0/9] Shared library module support
On Wed, 01/15 14:40, Peter Maydell wrote: > On 15 January 2014 14:36, Paolo Bonzini wrote: > > Il 15/01/2014 15:20, Peter Maydell ha scritto: > >>> > > >>> > On RHEL6 I tried "qemu-system-x86_64 -cdrom > >>> > http://boot.ipxe.org/ipxe.iso"; and it worked, but it failed on Fedora. > >>> > I don't know if it's a QEMU or curl bug. > >> Doesn't work on MacOSX either: it says > >> qemu-system-x86_64: -cdrom http://boot.ipxe.org/ipxe.iso: could not > >> open disk image http://boot.ipxe.org/ipxe.iso: Unknown protocol > > > > Had you installed QEMU before starting it? Fam, I think you need a way > > to override CONFIG_MODDIR (even an environment variable could do). > > No, I pretty much never install QEMU. I always run it out of a build > directory. Module loading should IMHO work with similar semantics > to the data dir we use for BIOS file loading: one of the places checked > is relative to the executable path. > > Maybe we should also have better error messages for "I tried > to load a module and this error is because I couldn't find one", > so the user might be prompted to install the modules package his > distro provides or investigate config if it's self-built, or whatever... > Sounds good to me. Will add a message. Fam
[Qemu-devel] [PATCH v17 03/10] rules.mak: allow per object cflags and libs
Adds extract-libs in LINK to expand any "per object libs", the syntax to define such a libs options is like: foo.o-libs := $(CURL_LIBS) in block/Makefile.objs. Similarly, foo.o-cflags := $(FOO_CFLAGS) is also supported. "foo.o" must be listed in a nested var (e.g. common-obj-y) to make the option variables effective. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- rules.mak | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/rules.mak b/rules.mak index 7d27602..9398268 100644 --- a/rules.mak +++ b/rules.mak @@ -21,15 +21,17 @@ QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(*D)/$(*F).d # Same as -I$(SRC_PATH) -I., but for the nested source/object directories QEMU_INCLUDES += -I$(
[Qemu-devel] [PATCH v17 00/10] Shared library module support
Many thanks for everyones testing and debugging! v17: [01/10] util: Split out qemu_exec_dir from os_find_datadir New. Used in 07 for module searching. [07/10] module: implement module loading Probe for shasum, sha1sum or sha1 in configure. (PMM, Paolo) Search modules in relative paths (./ and ../) of program. (PMM) This makes testing much easier for developers. And end users are safe with the protection of stamp check. Improved error message. Print to stderr when a module is not found. (PMM) Fam Zheng (9): util: Split out qemu_exec_dir from os_find_datadir rules.mak: fix $(obj) to a real relative path rules.mak: allow per object cflags and libs block: use per-object cflags and libs build-sys: introduce common-obj-m and block-obj-m for DSO module: implement module loading Makefile: install modules with "make install" .gitignore: ignore module related files (dll, so, mo) block: convert block drivers linked with libs to modules Paolo Bonzini (1): darwin: do not use -mdynamic-no-pic .gitignore| 3 ++ Makefile | 30 ++- Makefile.objs | 19 ++- Makefile.target | 21 ++-- block/Makefile.objs | 13 - configure | 91 --- include/qemu/module.h | 18 ++- include/qemu/osdep.h | 4 ++ module-common.c | 10 os-posix.c| 40 +++--- os-win32.c| 19 +-- rules.mak | 81 +++- scripts/create_config | 14 + util/module.c | 145 +- util/oslib-posix.c| 45 util/oslib-win32.c| 24 + 16 files changed, 457 insertions(+), 120 deletions(-) create mode 100644 module-common.c -- 1.8.5.3
[Qemu-devel] [PATCH v17 01/10] util: Split out qemu_exec_dir from os_find_datadir
This can be reused by module loading routines. Signed-off-by: Fam Zheng --- include/qemu/osdep.h | 4 os-posix.c | 40 ++-- os-win32.c | 19 +-- util/oslib-posix.c | 45 + util/oslib-win32.c | 24 5 files changed, 80 insertions(+), 52 deletions(-) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index b3e2b6d..e680684 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -215,6 +215,10 @@ bool fips_get_state(void); */ char *qemu_get_local_state_pathname(const char *relative_pathname); +/* Find program directory with best effort. Try platform API first, then parse + * argv0 if it's not NULL. The returned string needs to be g_free'ed */ +char *qemu_exec_dir(const char *argv0); + /** * qemu_getauxval: * @type: the auxiliary vector key to lookup diff --git a/os-posix.c b/os-posix.c index d39261d..c5e0722 100644 --- a/os-posix.c +++ b/os-posix.c @@ -86,44 +86,15 @@ void os_setup_signal_handling(void) #define BUILD_SUFFIX "/pc-bios" char *os_find_datadir(const char *argv0) { -char *dir; -char *p = NULL; +char *dir, *exec_dir; char *res; -char buf[PATH_MAX]; size_t max_len; -#if defined(__linux__) -{ -int len; -len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); -if (len > 0) { -buf[len] = 0; -p = buf; -} -} -#elif defined(__FreeBSD__) -{ -static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; -size_t len = sizeof(buf) - 1; - -*buf = '\0'; -if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && -*buf) { -buf[sizeof(buf) - 1] = '\0'; -p = buf; -} -} -#endif -/* If we don't have any way of figuring out the actual executable - location then try argv[0]. */ -if (!p) { -p = realpath(argv0, buf); -if (!p) { -return NULL; -} +exec_dir = qemu_exec_dir(argv0); +if (exec_dir == NULL) { +return NULL; } -dir = dirname(p); -dir = dirname(dir); +dir = dirname(exec_dir); max_len = strlen(dir) + MAX(strlen(SHARE_SUFFIX), strlen(BUILD_SUFFIX)) + 1; @@ -137,6 +108,7 @@ char *os_find_datadir(const char *argv0) } } +g_free(exec_dir); return res; } #undef SHARE_SUFFIX diff --git a/os-win32.c b/os-win32.c index 50b7f6f..564d5b4 100644 --- a/os-win32.c +++ b/os-win32.c @@ -86,24 +86,7 @@ void os_setup_early_signal_handling(void) /* Look for support files in the same directory as the executable. */ char *os_find_datadir(const char *argv0) { -char *p; -char buf[MAX_PATH]; -DWORD len; - -len = GetModuleFileName(NULL, buf, sizeof(buf) - 1); -if (len == 0) { -return NULL; -} - -buf[len] = 0; -p = buf + len - 1; -while (p != buf && *p != '\\') -p--; -*p = 0; -if (access(buf, R_OK) == 0) { -return g_strdup(buf); -} -return NULL; +return qemu_exec_dir(argv0); } void os_set_line_buffering(void) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e00a44c..7c1231f 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -54,6 +54,7 @@ extern int daemon(int, int); #include "trace.h" #include "qemu/sockets.h" #include +#include #ifdef CONFIG_LINUX #include @@ -251,3 +252,47 @@ qemu_get_local_state_pathname(const char *relative_pathname) return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, relative_pathname); } + +char *qemu_exec_dir(const char *argv0) +{ +char *dir; +char *p = NULL; +char buf[PATH_MAX]; + +#if defined(__linux__) +{ +int len; +len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); +if (len > 0) { +buf[len] = 0; +p = buf; +} +} +#elif defined(__FreeBSD__) +{ +static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; +size_t len = sizeof(buf) - 1; + +*buf = '\0'; +if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && +*buf) { +buf[sizeof(buf) - 1] = '\0'; +p = buf; +} +} +#endif +/* If we don't have any way of figuring out the actual executable + location then try argv[0]. */ +if (!p) { +if (!argv0) { +return NULL; +} +p = realpath(argv0, buf); +if (!p) { +return NULL; +} +} +dir = dirname(p); + +return g_strdup(dir); +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 776ccfa..8605f63 100644 --- a/util/oslib-win32.c +++ b/
[Qemu-devel] [PATCH v17 04/10] block: use per-object cflags and libs
No longer adds flags and libs for them to global variables, instead create config-host.mak variables like FOO_CFLAGS and FOO_LIBS, which is used as per object cflags and libs. This removes unwanted dependencies from libcacard. Signed-off-by: Fam Zheng [Split from Fam's patch to enable modules. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- block/Makefile.objs | 13 - configure | 25 ++--- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/block/Makefile.objs b/block/Makefile.objs index 4e8c91e..a1db63f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -23,4 +23,15 @@ common-obj-y += commit.o common-obj-y += mirror.o common-obj-y += backup.o -$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS) +iscsi.o-cflags := $(LIBISCSI_CFLAGS) +iscsi.o-libs := $(LIBISCSI_LIBS) +curl.o-cflags := $(CURL_CFLAGS) +curl.o-libs:= $(CURL_LIBS) +rbd.o-cflags := $(RBD_CFLAGS) +rbd.o-libs := $(RBD_LIBS) +gluster.o-cflags := $(GLUSTERFS_CFLAGS) +gluster.o-libs := $(GLUSTERFS_LIBS) +ssh.o-cflags := $(LIBSSH2_CFLAGS) +ssh.o-libs := $(LIBSSH2_LIBS) +qcow.o-libs:= -lz +linux-aio.o-libs := -laio diff --git a/configure b/configure index 4990648..9d71867 100755 --- a/configure +++ b/configure @@ -2303,8 +2303,6 @@ EOF curl_libs=`$curlconfig --libs 2>/dev/null` if compile_prog "$curl_cflags" "$curl_libs" ; then curl=yes -libs_tools="$curl_libs $libs_tools" -libs_softmmu="$curl_libs $libs_softmmu" else if test "$curl" = "yes" ; then feature_not_found "curl" @@ -2460,8 +2458,6 @@ EOF rbd_libs="-lrbd -lrados" if compile_prog "" "$rbd_libs" ; then rbd=yes -libs_tools="$rbd_libs $libs_tools" -libs_softmmu="$rbd_libs $libs_softmmu" else if test "$rbd" = "yes" ; then feature_not_found "rados block device" @@ -2478,9 +2474,6 @@ if test "$libssh2" != "no" ; then libssh2_cflags=`$pkg_config libssh2 --cflags` libssh2_libs=`$pkg_config libssh2 --libs` libssh2=yes -libs_tools="$libssh2_libs $libs_tools" -libs_softmmu="$libssh2_libs $libs_softmmu" -QEMU_CFLAGS="$QEMU_CFLAGS $libssh2_cflags" else if test "$libssh2" = "yes" ; then error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2" @@ -2526,8 +2519,6 @@ int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); retu EOF if compile_prog "" "-laio" ; then linux_aio=yes -libs_softmmu="$libs_softmmu -laio" -libs_tools="$libs_tools -laio" else if test "$linux_aio" = "yes" ; then feature_not_found "linux AIO" @@ -2696,9 +2687,6 @@ if test "$glusterfs" != "no" ; then glusterfs="yes" glusterfs_cflags=`$pkg_config --cflags glusterfs-api` glusterfs_libs=`$pkg_config --libs glusterfs-api` -CFLAGS="$CFLAGS $glusterfs_cflags" -libs_tools="$glusterfs_libs $libs_tools" -libs_softmmu="$glusterfs_libs $libs_softmmu" if $pkg_config --atleast-version=5 glusterfs-api; then glusterfs_discard="yes" fi @@ -3066,11 +3054,9 @@ EOF libiscsi="yes" libiscsi_cflags=$($pkg_config --cflags libiscsi) libiscsi_libs=$($pkg_config --libs libiscsi) -CFLAGS="$CFLAGS $libiscsi_cflags" -LIBS="$LIBS $libiscsi_libs" elif compile_prog "" "-liscsi" ; then libiscsi="yes" -LIBS="$LIBS -liscsi" +libiscsi_libs="-liscsi" else if test "$libiscsi" = "yes" ; then feature_not_found "libiscsi" @@ -4068,6 +4054,7 @@ fi if test "$curl" = "yes" ; then echo "CONFIG_CURL=y" >> $config_host_mak echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak + echo "CURL_LIBS=$curl_libs" >> $config_host_mak fi if test "$brlapi" = "yes" ; then echo "CONFIG_BRLAPI=y" >> $config_host_mak @@ -4160,6 +4147,8 @@ if test "$libiscsi" = "yes" ; then if test "$libiscsi_version" = "1.4.0"; then echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak fi + echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak + echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak fi if test "$seccomp" = "yes"; then @@ -4181,6 +4170,8 @@ if test "$qom_cast_debug" = "yes" ; then fi if test "$rbd" = "yes" ; the
[Qemu-devel] [PATCH v17 02/10] rules.mak: fix $(obj) to a real relative path
Makefile.target includes rule.mak and unnested common-obj-y, then prefix them with '../', this will ignore object specific QEMU_CFLAGS in subdir Makefile.objs: $(obj)/curl.o: QEMU_CFLAGS += $(CURL_CFLAGS) Because $(obj) here is './block', instead of '../block'. This doesn't hurt compiling because we basically build all .o from top Makefile, before entering Makefile.target, but it will affact arriving per-object libs support. The starting point of $(obj) is passed in as argument of unnest-vars, as well as nested variables, so that different Makefiles can pass in a right value. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile| 14 ++ Makefile.objs | 17 + Makefile.target | 17 + configure | 1 + rules.mak | 14 +- 5 files changed, 38 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index bdff4e4..423ace5 100644 --- a/Makefile +++ b/Makefile @@ -122,6 +122,16 @@ defconfig: ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/Makefile.objs +endif + +dummy := $(call unnest-vars,, \ +stub-obj-y \ +util-obj-y \ +qga-obj-y \ +block-obj-y \ +common-obj-y) + +ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/tests/Makefile endif ifeq ($(CONFIG_SMARTCARD_NSS),y) @@ -130,6 +140,10 @@ endif all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all +vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) + +vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS) + config-host.h: config-host.h-timestamp config-host.h-timestamp: config-host.mak qemu-options.def: $(SRC_PATH)/qemu-options.hx diff --git a/Makefile.objs b/Makefile.objs index 857bb53..489c4b1 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -41,7 +41,7 @@ libcacard-y += libcacard/vcardt.o # single QEMU executable should support all CPUs and machines. ifeq ($(CONFIG_SOFTMMU),y) -common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/ +common-obj-y = blockdev.o blockdev-nbd.o block/ common-obj-y += net/ common-obj-y += readline.o common-obj-y += qdev-monitor.o device-hotplug.o @@ -112,18 +112,3 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo # by libqemuutil.a. These should be moved to a separate .json schema. qga-obj-y = qga/ qapi-types.o qapi-visit.o qga-vss-dll-obj-y = qga/ - -vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) - -vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS) - -QEMU_CFLAGS+=$(GLIB_CFLAGS) - -nested-vars += \ - stub-obj-y \ - util-obj-y \ - qga-obj-y \ - qga-vss-dll-obj-y \ - block-obj-y \ - common-obj-y -dummy := $(call unnest-vars) diff --git a/Makefile.target b/Makefile.target index af6ac7e..9a6e7dd 100644 --- a/Makefile.target +++ b/Makefile.target @@ -139,13 +139,22 @@ endif # CONFIG_SOFTMMU # Workaround for http://gcc.gnu.org/PR55489, see configure. %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS) -nested-vars += obj-y +dummy := $(call unnest-vars,,obj-y) -# This resolves all nested paths, so it must come last +# we are making another call to unnest-vars with different vars, protect obj-y, +# it can be overriden in subdir Makefile.objs +obj-y-save := $(obj-y) + +block-obj-y := +common-obj-y := include $(SRC_PATH)/Makefile.objs +dummy := $(call unnest-vars,..,block-obj-y common-obj-y) + +# Now restore obj-y +obj-y := $(obj-y-save) -all-obj-y = $(obj-y) -all-obj-y += $(addprefix ../, $(common-obj-y)) +all-obj-y = $(obj-y) $(common-obj-y) +all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) ifndef CONFIG_HAIKU LIBS+=-lm diff --git a/configure b/configure index 3782a6a..4990648 100755 --- a/configure +++ b/configure @@ -2345,6 +2345,7 @@ fi if $pkg_config --atleast-version=$glib_req_ver gthread-2.0; then glib_cflags=`$pkg_config --cflags gthread-2.0` glib_libs=`$pkg_config --libs gthread-2.0` +CFLAGS="$glib_cflags $CFLAGS" LIBS="$glib_libs $LIBS" libs_qga="$glib_libs $libs_qga" else diff --git a/rules.mak b/rules.mak index 49edb9b..7d27602 100644 --- a/rules.mak +++ b/rules.mak @@ -138,9 +138,6 @@ clean: clean-timestamp # magic to descend into other directories -obj := . -old-nested-dirs := - define push-var $(eval save-$2-$1 = $(value $1)) $(eval $1 :=) @@ -154,9 +151,11 @@ endef define unnest-dir $(foreach var,$(nested-vars),$(call push-var,$(var),$1/)) -$(eval obj := $(obj)/$1) +$(eval obj-parent-$1 := $(obj)) +$(eval obj := $(if $(obj),$(obj)/$1,$1)) $(eval include $(SRC_PATH)/$1/Makefile.objs) -$(eval obj := $(patsubst %/$1,%,$(obj))) +$(eval obj := $(obj-parent-$1)) +$(eval obj-parent-$1 := ) $(foreach var,$(nested-vars),$(call pop-var,$(var),$1/)) endef @@ -171,7 +170,12 @@ $(if $(nested-dirs), endef define unnest-vars +$(eval obj := $1) +$(eval nested-vars := $2) +$(eval old-nested-dirs := ) $(call unnest-vars-1) +$(if $1,$(foreach v,$(nested-vars),$(eval \ + $v := $(addprefix $1/,$($v) $(for
[Qemu-devel] [PATCH v17 05/10] darwin: do not use -mdynamic-no-pic
From: Paolo Bonzini While -mdynamic-no-pic can speed up the code somewhat, it is only used on the legacy PowerPC Mac OS X, and I am not sure if anyone is still testing that. Disabling PIC can cause problems when enabling modules, so do not do that. Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- configure | 2 -- 1 file changed, 2 deletions(-) diff --git a/configure b/configure index 9d71867..99434e6 100755 --- a/configure +++ b/configure @@ -516,8 +516,6 @@ Darwin) if [ "$cpu" = "x86_64" ] ; then QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" LDFLAGS="-arch x86_64 $LDFLAGS" - else -QEMU_CFLAGS="-mdynamic-no-pic $QEMU_CFLAGS" fi cocoa="yes" audio_drv_list="coreaudio" -- 1.8.5.3
[Qemu-devel] [PATCH v17 07/10] module: implement module loading
This patch adds loading, stamp checking and initialization of modules. The init function of dynamic module is no longer directly called as __attribute__((constructor)) in static linked version, it is called only after passed the checking of presense of stamp symbol: qemu_stamp_$RELEASEHASH where $RELEASEHASH is generated by hashing version strings and content of configure script. With this, modules built from a different tree/version/configure will not be loaded. The module loading code requires gmodule-2.0. Modules are searched under - CONFIG_MODDIR - executable folder (to allow running qemu-{img,io} in the build directory) - ../ of executable folder (to allow running system emulator in the build directory) Modules are linked under their subdir respectively, then copied to top level of build directory for above convinience, e.g.: $(BUILD_DIR)/block/curl.so -> $(BUILD_DIR)/block-curl.so Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- Makefile | 3 ++ configure | 49 - include/qemu/module.h | 18 ++- module-common.c | 10 rules.mak | 10 ++-- scripts/create_config | 14 + util/module.c | 145 +- 7 files changed, 229 insertions(+), 20 deletions(-) create mode 100644 module-common.c diff --git a/Makefile b/Makefile index 9de66cb..670ce44 100644 --- a/Makefile +++ b/Makefile @@ -203,6 +203,9 @@ Makefile: $(version-obj-y) $(version-lobj-y) libqemustub.a: $(stub-obj-y) libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o +block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL +util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)' + ## qemu-img.o: qemu-img-cmds.h diff --git a/configure b/configure index a6a626f..0da1253 100755 --- a/configure +++ b/configure @@ -677,7 +677,8 @@ for opt do ;; --disable-debug-info) ;; - --enable-modules) modules="yes" + --enable-modules) + modules="yes" ;; --cpu=*) ;; @@ -1130,7 +1131,7 @@ Advanced options (experts only): --libdir=PATHinstall libraries in PATH --sysconfdir=PATHinstall config in PATH$confsuffix --localstatedir=PATH install local state in PATH (set at runtime on win32) - --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix] + --with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix] --enable-modules enable modules support --enable-debug-tcg enable TCG debugging --disable-debug-tcg disable TCG debugging (default) @@ -2346,14 +2347,32 @@ if test "$mingw32" = yes; then else glib_req_ver=2.12 fi -if $pkg_config --atleast-version=$glib_req_ver gthread-2.0; then -glib_cflags=`$pkg_config --cflags gthread-2.0` -glib_libs=`$pkg_config --libs gthread-2.0` -CFLAGS="$glib_cflags $CFLAGS" -LIBS="$glib_libs $LIBS" -libs_qga="$glib_libs $libs_qga" -else -error_exit "glib-$glib_req_ver required to compile QEMU" + +for i in gthread-2.0 gmodule-2.0; do +if $pkg_config --atleast-version=$glib_req_ver $i; then +glib_cflags=`$pkg_config --cflags $i` +glib_libs=`$pkg_config --libs $i` +CFLAGS="$glib_cflags $CFLAGS" +LIBS="$glib_libs $LIBS" +libs_qga="$glib_libs $libs_qga" +else +error_exit "glib-$glib_req_ver $i is required to compile QEMU" +fi +done + +## +# SHA command probe for modules +if test "$modules" = yes; then +shacmd_probe="sha1sum sha1 shasum" +for c in $shacmd_probe; do +if which $c &>/dev/null; then +shacmd="$c" +break +fi +done +if test "$shacmd" = ""; then +error_exit "one of the checksum commands is required to enable modules: $shacmd_probe" +fi fi ## @@ -3628,6 +3647,7 @@ if test "$mingw32" = "yes" ; then fi qemu_confdir=$sysconfdir$confsuffix +moddir=$libdir$confsuffix qemu_datadir=$datadir$confsuffix qemu_localedir="$datadir/locale" @@ -3718,6 +3738,7 @@ echo "Install prefix$prefix" echo "BIOS directory`eval echo $qemu_datadir`" echo "binary directory `eval echo $bindir`" echo "library directory `eval echo $libdir`" +echo "module directory `eval echo $moddir`" echo "libexec directory `eval echo $libexecdir`" echo "include directory `eval echo $includedir`" echo "config directory `eval echo $sysconfdir`" @@ -3849,6 +3870,7 @@ echo all: >&
[Qemu-devel] [PATCH v17 06/10] build-sys: introduce common-obj-m and block-obj-m for DSO
Add necessary rules and flags for shared object generation. $(common-obj-m) will include $(block-obj-m), like $(common-obj-y) does for $(block-obj-y). The new rules introduced here are: 0) For all %.so compiling: QEMU_CFLAGS += -fPIC 1) %.o in $(common-obj-m) is compiled to %.o, then linked to %.so. 2) %.mo in $(common-obj-m) is the placeholder for %.so for pattern matching in Makefile. It's linked to "-shared" with all its dependencies (multiple *.o) as input. Which means the list of depended objects must be specified in each sub-Makefile.objs: foo.mo-objs := bar.o baz.o qux.o in the same style with foo.o-cflags and foo.o-libs. The objects here will be prefixed with "$(obj)/" if it's a subdirectory Makefile.objs. Also introduce --enable-modules in configure, the option will enable support of shared object build. Otherwise objects are static linked to executables. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile| 9 +++-- Makefile.objs | 2 ++ Makefile.target | 6 +- configure | 14 ++ rules.mak | 54 +- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 423ace5..9de66cb 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,9 @@ dummy := $(call unnest-vars,, \ util-obj-y \ qga-obj-y \ block-obj-y \ -common-obj-y) +block-obj-m \ +common-obj-y \ +common-obj-m) ifneq ($(wildcard config-host.mak),) include $(SRC_PATH)/tests/Makefile @@ -138,7 +140,7 @@ ifeq ($(CONFIG_SMARTCARD_NSS),y) include $(SRC_PATH)/libcacard/Makefile endif -all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all +all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) @@ -256,6 +258,9 @@ clean: rm -f qemu-options.def find . -name '*.[oda]' -type f -exec rm -f {} + find . -name '*.l[oa]' -type f -exec rm -f {} + + find . -name '*.so' -type f -exec rm -f {} + + find . -name '*.mo' -type f -exec rm -f {} + + find . -name '*.dll' -type f -exec rm -f {} + rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~ rm -f fsdev/*.pod rm -rf .libs */.libs diff --git a/Makefile.objs b/Makefile.objs index 489c4b1..56f256f 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -19,6 +19,8 @@ block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o block-obj-y += qemu-coroutine-sleep.o block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o +block-obj-m = block/ + ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy) # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add. # only pull in the actual virtio-9p device if we also enabled virtio. diff --git a/Makefile.target b/Makefile.target index 9a6e7dd..3945260 100644 --- a/Makefile.target +++ b/Makefile.target @@ -148,7 +148,11 @@ obj-y-save := $(obj-y) block-obj-y := common-obj-y := include $(SRC_PATH)/Makefile.objs -dummy := $(call unnest-vars,..,block-obj-y common-obj-y) +dummy := $(call unnest-vars,.., \ + block-obj-y \ + block-obj-m \ + common-obj-y \ + common-obj-m) # Now restore obj-y obj-y := $(obj-y-save) diff --git a/configure b/configure index 99434e6..a6a626f 100755 --- a/configure +++ b/configure @@ -205,6 +205,9 @@ mingw32="no" gcov="no" gcov_tool="gcov" EXESUF="" +DSOSUF=".so" +LDFLAGS_SHARED="-shared" +modules="no" prefix="/usr/local" mandir="\${prefix}/share/man" datadir="\${prefix}/share" @@ -513,6 +516,7 @@ OpenBSD) Darwin) bsd="yes" darwin="yes" + LDFLAGS_SHARED="-bundle -undefined dynamic_lookup" if [ "$cpu" = "x86_64" ] ; then QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS" LDFLAGS="-arch x86_64 $LDFLAGS" @@ -606,6 +610,7 @@ fi if test "$mingw32" = "yes" ; then EXESUF=".exe" + DSOSUF=".dll" QEMU_CFLAGS="-DWIN32_LEAN_AND_MEAN -DWINVER=0x501 $QEMU_CFLAGS" # enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS" @@ -672,6 +677,8 @@ for opt do ;; --disable-debug-info) ;; + --enable-modules) modules="yes" + ;; --cpu=*) ;; --target-list=*) target_list="$optarg" @@ -1124,6 +1131,7 @@ Advanced options (experts only): --sysconfdir=PATHinstall config in PATH$confsuffix --localstatedir=PATH install local state in PATH (set at runtime on win32) --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir
[Qemu-devel] [PATCH v17 08/10] Makefile: install modules with "make install"
Install all the modules to ${MODDIR}. Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- Makefile | 6 ++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 670ce44..a91f119 100644 --- a/Makefile +++ b/Makefile @@ -371,6 +371,12 @@ install-datadir install-localstatedir ifneq ($(TOOLS),) $(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)" endif +ifneq ($(CONFIG_MODULES),) + $(INSTALL_DIR) "$(DESTDIR)$(moddir)" + for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \ + $(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(moddir)/$${s//\//-}"; \ + done +endif ifneq ($(HELPERS-y),) $(INSTALL_DIR) "$(DESTDIR)$(libexecdir)" $(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)" -- 1.8.5.3
[Qemu-devel] [PATCH v17 09/10] .gitignore: ignore module related files (dll, so, mo)
Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 1c9d63d..7702b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ fsdev/virtfs-proxy-helper.pod *.cp *.dvi *.exe +*.dll +*.so +*.mo *.fn *.ky *.log -- 1.8.5.3
[Qemu-devel] [PATCH v17 10/10] block: convert block drivers linked with libs to modules
The converted block drivers are: curl iscsi rbd ssh glusterfs Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini Signed-off-by: Fam Zheng --- configure | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure b/configure index 0da1253..2234fe8 100755 --- a/configure +++ b/configure @@ -4085,7 +4085,7 @@ if test "$bswap_h" = "yes" ; then echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak fi if test "$curl" = "yes" ; then - echo "CONFIG_CURL=y" >> $config_host_mak + echo "CONFIG_CURL=m" >> $config_host_mak echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak echo "CURL_LIBS=$curl_libs" >> $config_host_mak fi @@ -4176,7 +4176,7 @@ if test "$glx" = "yes" ; then fi if test "$libiscsi" = "yes" ; then - echo "CONFIG_LIBISCSI=y" >> $config_host_mak + echo "CONFIG_LIBISCSI=m" >> $config_host_mak if test "$libiscsi_version" = "1.4.0"; then echo "CONFIG_LIBISCSI_1_4=y" >> $config_host_mak fi @@ -4202,7 +4202,7 @@ if test "$qom_cast_debug" = "yes" ; then echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak fi if test "$rbd" = "yes" ; then - echo "CONFIG_RBD=y" >> $config_host_mak + echo "CONFIG_RBD=m" >> $config_host_mak echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak echo "RBD_LIBS=$rbd_libs" >> $config_host_mak fi @@ -4247,7 +4247,7 @@ if test "$getauxval" = "yes" ; then fi if test "$glusterfs" = "yes" ; then - echo "CONFIG_GLUSTERFS=y" >> $config_host_mak + echo "CONFIG_GLUSTERFS=m" >> $config_host_mak echo "GLUSTERFS_CFLAGS=$glusterfs_cflags" >> $config_host_mak echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak fi @@ -4257,7 +4257,7 @@ if test "$glusterfs_discard" = "yes" ; then fi if test "$libssh2" = "yes" ; then - echo "CONFIG_LIBSSH2=y" >> $config_host_mak + echo "CONFIG_LIBSSH2=m" >> $config_host_mak echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak fi -- 1.8.5.3
Re: [Qemu-devel] [PATCH v16 2/9] rules.mak: allow per object cflags and libs
On Wed, 01/15 19:35, Peter Maydell wrote: > On 15 January 2014 08:48, Fam Zheng wrote: > > Adds extract-libs in LINK to expand any "per object libs", the syntax to > > define > > such a libs options is like: > > > > foo.o-libs := $(CURL_LIBS) > > > > in block/Makefile.objs. > > > > Similarly, > > > > foo.o-cflags := $(FOO_CFLAGS) > > > > is also supported. > > > > "foo.o" must be listed in a nested var (e.g. common-obj-y) to make the > > option variables effective. > > > > Signed-off-by: Fam Zheng > > Signed-off-by: Paolo Bonzini > > --- > > rules.mak | 19 --- > > 1 file changed, 16 insertions(+), 3 deletions(-) > > > > diff --git a/rules.mak b/rules.mak > > index 7d27602..9398268 100644 > > --- a/rules.mak > > +++ b/rules.mak > > @@ -21,15 +21,17 @@ QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(*D)/$(*F).d > > # Same as -I$(SRC_PATH) -I., but for the nested source/object directories > > QEMU_INCLUDES += -I$( > > > +extract-libs = $(strip $(foreach o,$1,$($o-libs))) > > + > > %.o: %.c > > - $(call quiet-command,$(CC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) > > $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<," CC$(TARGET_DIR)$@") > > + $(call quiet-command,$(CC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) > > $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<," CC > > $(TARGET_DIR)$@") > > Shouldn't this change also be made in the rules for compiling > .cpp and .m files (which also use CFLAGS and could benefit > from per-object flags) ? > > (For instance it would be nice if I could compile the libvixl > C++ sources with a different set of flags.) > Sounds reasonable. As I've already sent v17 without this change, I'll remember to add this in later revisions or follow-up patches. Fam
Re: [Qemu-devel] [PATCH v16 2/9] rules.mak: allow per object cflags and libs
On Thu, 01/16 11:04, Peter Maydell wrote: > On 15 January 2014 08:48, Fam Zheng wrote:.objs. > > Similarly, > > > > foo.o-cflags := $(FOO_CFLAGS) > > > > is also supported. > > I noticed that we already support per-object cflags via: > > $(obj)/adlib.o $(obj)/fmopl.o: QEMU_CFLAGS += -DBUILD_Y8950=0 > > (this example from audio/Makefile.objs). Is your new > method better in some way? Are we going to convert > users of the old-style method? > I added this new method as it's cleaner to use, especially when defining multiple-object module (implemented in this series): foo.mo-objs := bar.o biz.o qux.o , compared to $(obj)/foo.mo-objs := $(obj)/bar.o $(obj)/biz.o $(obj)/qux.o or slightly better $(obj)/foo.mo-objs := $(addprefix $(obj), bar.o, biz.o, qux.o) I've already converted block/curl.o's cflags to this, later in this series. So yes I think this is a better syntax. Thanks, Fam
Re: [Qemu-devel] [PATCH v2 13/24] block: Introduce bdrv_co_do_pwritev()
On Thu, 01/16 13:25, Kevin Wolf wrote: > Am 13.12.2013 um 14:22 hat Kevin Wolf geschrieben: > > This is going to become the bdrv_co_do_preadv() equivalent for writes. > > In this patch, however, just a function taking byte offsets is created, > > it doesn't align anything yet. > > > > Signed-off-by: Kevin Wolf > > --- > > block.c | 23 +-- > > 1 file changed, 17 insertions(+), 6 deletions(-) > > > > diff --git a/block.c b/block.c > > index 385fb8a..a80db2e 100644 > > --- a/block.c > > +++ b/block.c > > @@ -3010,8 +3010,8 @@ static int coroutine_fn > > bdrv_aligned_pwritev(BlockDriverState *bs, > > /* > > * Handle a write request in coroutine context > > */ > > -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, > > -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, > > +static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, > > +int64_t offset, unsigned int bytes, QEMUIOVector *qiov, > > BdrvRequestFlags flags) > > { > > int ret; > > @@ -3022,21 +3022,32 @@ static int coroutine_fn > > bdrv_co_do_writev(BlockDriverState *bs, > > if (bs->read_only) { > > return -EACCES; > > } > > -if (bdrv_check_request(bs, sector_num, nb_sectors)) { > > +if (bdrv_check_byte_request(bs, offset, bytes)) { > > return -EIO; > > } > > > > /* throttling disk I/O */ > > if (bs->io_limits_enabled) { > > -bdrv_io_limits_intercept(bs, nb_sectors, true); > > +bdrv_io_limits_intercept(bs, bytes << BDRV_SECTOR_BITS, true); > > } > > Oh nice, this shifts in the wrong direction. > > If somebody feels like writing a test case, something testing that I/O > throttling actually throttles would be useful... > Good idea. Should be possible for a basic test in qemu-iotests. I'll give it a try. Thanks, Fam
Re: [Qemu-devel] [PATCH 0/9] QMP: Introduce incremental drive-backup with in-memory dirty bitmap
On Fri, 01/17 17:25, Stefan Hajnoczi wrote: > On Mon, Jan 13, 2014 at 06:39:39PM +0800, Fam Zheng wrote: > > This implements incremental backup. > > > > A few new QMP commands related to dirty bitmap are added: > > > > dirty-bitmap-add * > > dirty-bitmap-disable * > > dirty-bitmap-remove > > > > (*: also supported as transactions) > > > > As their name implies, they manipulate a block device's dirty bitmap. This > > doesn't interfere with dirty bitmap used for migration, backup, mirror, etc, > > which don't have a name and are invisible to user. Only named bitmaps > > (created > > by dirty-bitmap-add) can be disabled/removed by user. > > > > They are added to support "user controlled write tracking", so as to > > determine > > the range of date for incremental backup. > > > > A new sync mode for drive-backup is introduced: > > > > drive-backup device=.. mode=.. sync=dirty-bitmap bitmap=bitmap0 > > > > Which will scan dirty bitmap "bitmap0" and only copy all dirty sectors to > > target. > > > > Now, let's see the usage with a simple example: > > > > # Start the guest > > vm = VM() > > vm.start() > > > > # Fake some guest writes with "qemu-io", this is before creating dirty > > # bitmap, so it won't be copied > > vm.hmp('qemu-io ide0-hd0 "write -P 0xa 512k 1M"') > > > > # Create a dirty bitmap to track writes > > vm.qmp("dirty-bitmap-add", device="ide0-hd0", name="dirty-0") > > > > # Fake some more guest writes with "qemu-io", this will be copied > > vm.hmp('qemu-io ide0-hd0 "write -P 0xa 512M 1M"') > > > > # Now "disable" the first dirty bitmap, do the backup according to it, > > # at meantime continue to track dirty with a new dirty bitmap > > vm.qmp("transaction", actions=[ > > { > > 'type': 'dirty-bitmap-disable', 'data': { > > 'device': 'ide0-hd0', > > 'name': 'dirty-0' > > } > > }, { > > 'type': 'dirty-bitmap-add', 'data': { > > 'device': 'ide0-hd0', > > 'name': 'dirty-1' > > } > > }, { > > 'type': 'drive-backup', 'data': { > > 'device': 'ide0-hd0', > > 'target': '/tmp/incre.qcow2', > > 'bitmap': 'dirty-0', > > 'sync': 'dirty-bitmap' > > } > > } > > ]) > > > > # Once backup job started, the dirty bitmap can be removed (actually > > only > > # hidden from user since it is still referenced by block job > > vm.qmp("dirty-bitmap-remove", device="ide0-hd0", name="dirty-0") > > I'm interested in the lifecycle of a dirty bitmap (but haven't reviewed > the patches yet). In particular, what happens if a bitmap is added to > more than one drive? Is there a more elegant way to handle the disable, > drive-backup, remove step (we only need to explicitly disable because we > still need the bitmap name for the drive-backup command)? Also what > happens if we add the bitmap again after disabling? A same name on that device can't be used again unless it's removed. A bitmap is associated to (and only) one device, it can't be shared. > > No need to answer all these questions, but it suggests the interface > exposes a bit of complexity. Maybe it's possible to make it simpler and > easier to use? > At least the user has to explicitly start tracking, that's the dirty-bitmap-add step. Alternatively, we can have "disable, drive-backup, remove" step simplified as: drive-backup sync=dirty-bitmap bitmap=dirty0 reset-bitmap=true where backup job copy out the dirty bitmap (and clears it, as reset-bitmap is true), and backup with it atomically. Of course it doesn't have to actually copy the whole bitmap: it just makes the old one anonymous, create a new empty one and give it the same name. When backup is done, the old bitmap is removed. What do you think? Fam > > P.S. Persistent dirty bitmap could be built on top of this series,
Re: [Qemu-devel] [RFC v2 1/6] rfifolock: add recursive FIFO lock
On Fri, 01/10 09:45, Stefan Hajnoczi wrote: > QemuMutex does not guarantee fairness and cannot be acquired > recursively: > > Fairness means each locker gets a turn and the scheduler cannot cause > starvation. > > Recursive locking is useful for composition, it allows a sequence of > locking operations to be invoked atomically by acquiring the lock around > them. > > This patch adds RFifoLock, a recursive lock that guarantees FIFO order. > Its first user is added in the next patch. > > RFifoLock has one additional feature: it can be initialized with an > optional contention callback. The callback is invoked whenever a thread > must wait for the lock. For example, it can be used to poke the current > owner so that they release the lock soon. > Is it better to make the contention callback per-caller than per-lock? Considering that different caller may want to do different things depending on current code path. Fam > Signed-off-by: Stefan Hajnoczi > --- > include/qemu/rfifolock.h | 54 + > tests/Makefile | 2 ++ > tests/test-rfifolock.c | 90 > > util/Makefile.objs | 1 + > util/rfifolock.c | 78 + > 5 files changed, 225 insertions(+) > create mode 100644 include/qemu/rfifolock.h > create mode 100644 tests/test-rfifolock.c > create mode 100644 util/rfifolock.c > > diff --git a/include/qemu/rfifolock.h b/include/qemu/rfifolock.h > new file mode 100644 > index 000..b23ab53 > --- /dev/null > +++ b/include/qemu/rfifolock.h > @@ -0,0 +1,54 @@ > +/* > + * Recursive FIFO lock > + * > + * Copyright Red Hat, Inc. 2013 > + * > + * Authors: > + * Stefan Hajnoczi > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef QEMU_RFIFOLOCK_H > +#define QEMU_RFIFOLOCK_H > + > +#include "qemu/thread.h" > + > +/* Recursive FIFO lock > + * > + * This lock provides more features than a plain mutex: > + * > + * 1. Fairness - enforces FIFO order. > + * 2. Nesting - can be taken recursively. > + * 3. Contention callback - optional, called when thread must wait. > + * > + * The recursive FIFO lock is heavyweight so prefer other synchronization > + * primitives if you do not need its features. > + */ > +typedef struct { > +QemuMutex lock; /* protects all fields */ > + > +/* FIFO order */ > +unsigned int head; /* active ticket number */ > +unsigned int tail; /* waiting ticket number */ > +QemuCond cond; /* used to wait for our ticket number */ > + > +/* Nesting */ > +QemuThread owner_thread;/* thread that currently has ownership */ > +unsigned int nesting; /* amount of nesting levels */ > + > +/* Contention callback */ > +void (*cb)(void *); /* called when thread must wait, with ->lock > + * held so it may not recursively lock/unlock > + */ > +void *cb_opaque; > +} RFifoLock; > + > +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque); > +void rfifolock_destroy(RFifoLock *r); > +void rfifolock_lock(RFifoLock *r); > +void rfifolock_unlock(RFifoLock *r); > + > +#endif /* QEMU_RFIFOLOCK_H */ > diff --git a/tests/Makefile b/tests/Makefile > index 0b85a34..b1b3686 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -31,6 +31,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF) > check-unit-y += tests/test-iov$(EXESUF) > gcov-files-test-iov-y = util/iov.c > check-unit-y += tests/test-aio$(EXESUF) > +check-unit-y += tests/test-rfifolock$(EXESUF) > check-unit-y += tests/test-throttle$(EXESUF) > gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c > gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c > @@ -153,6 +154,7 @@ tests/check-qjson$(EXESUF): tests/check-qjson.o > libqemuutil.a libqemustub.a > tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o > $(qom-core-obj) libqemuutil.a > tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) > libqemuutil.a libqemustub.a > tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a > libqemustub.a > +tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o libqemuutil.a > libqemustub.a > tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) > libqemuutil.a libqemustub.a > tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) > libqemuutil.a libqemustub.a > tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a > diff --git a/tests/test-rfifolock.c b/tests/test-rfifolock.c > new file mode 100644 > index 000..440dbcb > --- /dev/null > +++ b/tests/test-rfifolock.c > @@ -0,0 +1,90 @@ > +/* > + * RFifoLock tests > + * > + * Copyright Red Hat, Inc. 2013 > + * > + * Authors: > + * Stefan Hajnoczi > + * > + * This work is licensed under the terms
Re: [Qemu-devel] [RFC v2 2/6] aio: add aio_context_acquire() and aio_context_release()
On Fri, 01/10 09:45, Stefan Hajnoczi wrote: > It can be useful to run an AioContext from a thread which normally does > not "own" the AioContext. For example, request draining can be > implemented by acquiring the AioContext and looping aio_poll() until all > requests have been completed. > > The following pattern should work: > > /* Event loop thread */ > while (running) { > aio_context_acquire(ctx); > aio_poll(ctx, true); > aio_context_release(ctx); > } > > /* Another thread */ > aio_context_acquire(ctx); > bdrv_read(bs, 0x1000, buf, 1); > aio_context_release(ctx); > > This patch implements aio_context_acquire() and aio_context_release(). > > Note that existing aio_poll() callers do not need to worry about > acquiring and releasing - it is only needed when multiple threads will > call aio_poll() on the same AioContext. > > Signed-off-by: Stefan Hajnoczi > --- > async.c | 18 + > include/block/aio.h | 18 + > tests/test-aio.c| 58 > + > 3 files changed, 94 insertions(+) > > diff --git a/async.c b/async.c > index 5fb3fa6..6930185 100644 > --- a/async.c > +++ b/async.c > @@ -214,6 +214,7 @@ aio_ctx_finalize(GSource *source) > thread_pool_free(ctx->thread_pool); > aio_set_event_notifier(ctx, &ctx->notifier, NULL); > event_notifier_cleanup(&ctx->notifier); > +rfifolock_destroy(&ctx->lock); > qemu_mutex_destroy(&ctx->bh_lock); > g_array_free(ctx->pollfds, TRUE); > timerlistgroup_deinit(&ctx->tlg); > @@ -250,6 +251,12 @@ static void aio_timerlist_notify(void *opaque) > aio_notify(opaque); > } > > +static void aio_rfifolock_cb(void *opaque) > +{ > +/* Kick owner thread in case they are blocked in aio_poll() */ > +aio_notify(opaque); > +} > + > AioContext *aio_context_new(void) > { > AioContext *ctx; > @@ -257,6 +264,7 @@ AioContext *aio_context_new(void) > ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); > ctx->thread_pool = NULL; > qemu_mutex_init(&ctx->bh_lock); > +rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); > event_notifier_init(&ctx->notifier, false); > aio_set_event_notifier(ctx, &ctx->notifier, > (EventNotifierHandler *) > @@ -275,3 +283,13 @@ void aio_context_unref(AioContext *ctx) > { > g_source_unref(&ctx->source); > } > + > +void aio_context_acquire(AioContext *ctx) > +{ > +rfifolock_lock(&ctx->lock); > +} > + > +void aio_context_release(AioContext *ctx) > +{ > +rfifolock_unlock(&ctx->lock); > +} > diff --git a/include/block/aio.h b/include/block/aio.h > index 2efdf41..4aaa5d5 100644 > --- a/include/block/aio.h > +++ b/include/block/aio.h > @@ -19,6 +19,7 @@ > #include "qemu/queue.h" > #include "qemu/event_notifier.h" > #include "qemu/thread.h" > +#include "qemu/rfifolock.h" > #include "qemu/timer.h" > > typedef struct BlockDriverAIOCB BlockDriverAIOCB; > @@ -47,6 +48,9 @@ typedef void IOHandler(void *opaque); > struct AioContext { > GSource source; > > +/* Protects all fields from multi-threaded access */ > +RFifoLock lock; > + > /* The list of registered AIO handlers */ > QLIST_HEAD(, AioHandler) aio_handlers; > > @@ -104,6 +108,20 @@ void aio_context_ref(AioContext *ctx); > */ > void aio_context_unref(AioContext *ctx); > > +/* Take ownership of the AioContext. If the AioContext will be shared > between > + * threads, a thread must have ownership when calling aio_poll(). > + * > + * Note that multiple threads calling aio_poll() means timers, BHs, and > + * callbacks may be invoked from a different thread than they were registered > + * from. Therefore, code must use AioContext acquire/release or use > + * fine-grained synchronization to protect shared state if other threads will > + * be accessing it simultaneously. > + */ > +void aio_context_acquire(AioContext *ctx); > + > +/* Reliquinish ownership of the AioContext. */ s/Reliquinish/Relinquish/ Fam > +void aio_context_release(AioContext *ctx); > + > /** > * aio_bh_new: Allocate a new bottom half structure. > * > diff --git a/tests/test-aio.c b/tests/test-aio.c > index 592721e..d384b0b 100644 > --- a/tests/test-aio.c > +++ b/tests/test-aio.c > @@ -112,6 +112,63 @@ static void test_notify(void) > g_assert(!aio_poll(ctx, false)); > } > > +typedef struct { > +QemuMutex start_lock; > +bool thread_acquired; > +} AcquireTestData; > + > +static void *test_acquire_thread(void *opaque) > +{ > +AcquireTestData *data = opaque; > + > +/* Wait for other thread to let us start */ > +qemu_mutex_lock(&data->start_lock); > +qemu_mutex_unlock(&data->start_lock); > + > +aio_context_acquire(ctx); > +aio_context_release(ctx); > + > +data->thread_acquired = true; /* success, we got here */ > + > +return NULL; > +} > + > +static void dummy_notifier_read(EventNotifier *unused) > +{ > +
Re: [Qemu-devel] [PATCH V5 1/7] block: Add bs->node_name to hold the name of a bs node of the bs graph.
char *node_name) > +{ > +BlockDriverState *bs; > + > +assert(node_name); > + > +QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { > +if (!strcmp(node_name, bs->node_name)) { > +return bs; > +} > +} > +return NULL; > +} > + > BlockDriverState *bdrv_next(BlockDriverState *bs) > { > if (!bs) { > return QTAILQ_FIRST(&bdrv_states); > } > -return QTAILQ_NEXT(bs, list); > +return QTAILQ_NEXT(bs, device_list); > } > > void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void > *opaque) > { > BlockDriverState *bs; > > -QTAILQ_FOREACH(bs, &bdrv_states, list) { > +QTAILQ_FOREACH(bs, &bdrv_states, device_list) { > it(opaque, bs); > } > } > @@ -3154,7 +3183,7 @@ int bdrv_flush_all(void) > BlockDriverState *bs; > int result = 0; > > -QTAILQ_FOREACH(bs, &bdrv_states, list) { > +QTAILQ_FOREACH(bs, &bdrv_states, device_list) { > int ret = bdrv_flush(bs); > if (ret < 0 && !result) { > result = ret; > @@ -4278,7 +4307,7 @@ void bdrv_invalidate_cache_all(void) > { > BlockDriverState *bs; > > -QTAILQ_FOREACH(bs, &bdrv_states, list) { > +QTAILQ_FOREACH(bs, &bdrv_states, device_list) { > bdrv_invalidate_cache(bs); > } > } > @@ -4287,7 +4316,7 @@ void bdrv_clear_incoming_migration_all(void) > { > BlockDriverState *bs; > > -QTAILQ_FOREACH(bs, &bdrv_states, list) { > +QTAILQ_FOREACH(bs, &bdrv_states, device_list) { > bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); > } > } > diff --git a/include/block/block.h b/include/block/block.h > index 36efaea..834abf9 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -374,6 +374,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked); > void bdrv_eject(BlockDriverState *bs, bool eject_flag); > const char *bdrv_get_format_name(BlockDriverState *bs); > BlockDriverState *bdrv_find(const char *name); > +BlockDriverState *bdrv_find_node(const char *node_name); > BlockDriverState *bdrv_next(BlockDriverState *bs); > void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), >void *opaque); > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 8b132d7..bd5220f 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -325,11 +325,18 @@ struct BlockDriverState { > BlockdevOnError on_read_error, on_write_error; > bool iostatus_enabled; > BlockDeviceIoStatus iostatus; > + > +/* the following member gives a name to every node on the bs graph. */ > +char node_name[32]; > +/* element of the list of named nodes building the graph */ > +QTAILQ_ENTRY(BlockDriverState) node_list; > +/* Device name is the name associated with the "drive" the guest sees */ > char device_name[32]; > +/* element of the list of "drives" the guest sees */ > +QTAILQ_ENTRY(BlockDriverState) device_list; > QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; > int refcnt; > int in_use; /* users other than guest access, eg. block migration */ > -QTAILQ_ENTRY(BlockDriverState) list; > > QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; > > -- > 1.8.3.2 > > Reviewed-by: Fam Zheng
Re: [Qemu-devel] [PATCH V5 2/7] block: Allow the user to define "node-name" option.
On Thu, 12/12 16:33, Benoît Canet wrote: > Signed-off-by: Benoit Canet > --- > block.c | 38 ++ > 1 file changed, 38 insertions(+) > > diff --git a/block.c b/block.c > index 481d566..1c57f0d 100644 > --- a/block.c > +++ b/block.c > @@ -735,6 +735,39 @@ static int bdrv_open_flags(BlockDriverState *bs, int > flags) > return open_flags; > } > > +static int bdrv_get_node_name(BlockDriverState *bs, > + QDict *options, > + Error **errp) This function actually assigns the node-name to bs, could you call it "bdrv_set_node_name" or "bdrv_assign_node_name", and only pass in the node-name and move the parsing of options to the caller? Fam > +{ > +const char *node_name = NULL; > + > +node_name = qdict_get_try_str(options, "node-name"); > + > +if (!node_name) { > +return 0; > +} > + > +/* empty string node name is invalid */ > +if (node_name[0] == '\0') { > +error_setg(errp, "Empty node name"); > +return -EINVAL; > +} > + > +/* takes care of avoiding duplicates node names */ > +if (bdrv_find_node(node_name)) { > +error_setg(errp, "Duplicate node name"); > +return -EINVAL; > +} > + > +/* copy node name into the bs and insert it into the graph list */ > +pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); > +QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); > + > +qdict_del(options, "node-name"); > + > +return 0; > +} > + > /* > * Common part for opening disk images and files > * > @@ -759,6 +792,11 @@ static int bdrv_open_common(BlockDriverState *bs, > BlockDriverState *file, > > trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); > > +ret = bdrv_get_node_name(bs, options, errp); > +if (ret < 0) { > +return ret; > +} > + > /* bdrv_open() with directly using a protocol as drv. This layer is > already > * opened, so assign it to bs (while file becomes a closed > BlockDriverState) > * and return immediately. */ > -- > 1.8.3.2 > >
Re: [Qemu-devel] [PATCH V5 3/7] qmp: Add a command to list the named BlockDriverState nodes.
king device > # > +# @node-name: #optional the name of the block driver node (Since 2.0) > +# > # @ro: true if the backing device was open read-only > # > # @drv: the name of the block format used to open the backing device. As of > @@ -857,10 +859,9 @@ > # > # Since: 0.14.0 > # > -# Notes: This interface is only found in @BlockInfo. > ## > { 'type': 'BlockDeviceInfo', > - 'data': { 'file': 'str', 'ro': 'bool', 'drv': 'str', > + 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', > '*backing_file': 'str', 'backing_file_depth': 'int', > 'encrypted': 'bool', 'encryption_key_missing': 'bool', > 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', > @@ -2008,6 +2009,17 @@ > { 'command': 'drive-backup', 'data': 'DriveBackup' } > > ## > +# @query-named-block-nodes > +# > +# Get the named block driver list > +# > +# Returns: the list of BlockDeviceInfo > +# > +# Since 2.0 > +## > +{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] } > + > +## > # @drive-mirror > # > # Start mirroring a block device's writes to a new destination. > diff --git a/qmp-commands.hx b/qmp-commands.hx > index fba15cd..d644fe9 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -3295,3 +3295,64 @@ Example (2): > <- { "return": {} } > > EQMP > + > +{ > +.name = "query-named-block-nodes", > +.args_type = "", > +.mhandler.cmd_new = qmp_marshal_input_query_named_block_nodes, > +}, > + > +SQMP > +@query-named-block-nodes > + > + > +Return a list of BlockDeviceInfo for each named block driver node How about "Return a list of BlockDeviceInfo for all the named block driver nodes" But nothing to stop adding Reviewed-by: Fam Zheng > + > +Example: > + > +-> { "execute": "query-named-block-nodes" } > +<- { "return": [ { "ro":false, > + "drv":"qcow2", > + "encrypted":false, > + "file":"disks/test.qcow2", > + "node-name": "my-node", > + "backing_file_depth":1, > + "bps":100, > + "bps_rd":0, > + "bps_wr":0, > + "iops":100, > + "iops_rd":0, > + "iops_wr":0, > + "bps_max": 800, > + "bps_rd_max": 0, > + "bps_wr_max": 0, > + "iops_max": 0, > + "iops_rd_max": 0, > + "iops_wr_max": 0, > + "iops_size": 0, > + "image":{ > + "filename":"disks/test.qcow2", > + "format":"qcow2", > + "virtual-size":2048000, > + "backing_file":"base.qcow2", > + "full-backing-filename":"disks/base.qcow2", > + "backing-filename-format:"qcow2", > + "snapshots":[ > + { > +"id": "1", > +"name": "snapshot1", > +"vm-state-size": 0, > +"date-sec": 1200, > +"date-nsec": 12, > +"vm-clock-sec": 206, > +"vm-clock-nsec": 30 > + } > + ], > + "backing-image":{ > + "filename":"disks/base.qcow2", > + "format":"qcow2", > + "virtual-size":2048000 > + } > + } } ] } > + > +EQMP > -- > 1.8.3.2 > >
Re: [Qemu-devel] [PATCH V5 4/7] qmp: Allow to change password on named block driver states.
qmp_block_passwd(true, device, false, NULL, password, &errp); > hmp_handle_error(mon, &errp); > } > > diff --git a/include/block/block.h b/include/block/block.h > index 8c10123..f7d8017 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -376,6 +376,9 @@ const char *bdrv_get_format_name(BlockDriverState *bs); > BlockDriverState *bdrv_find(const char *name); > BlockDriverState *bdrv_find_node(const char *node_name); > BlockDeviceInfoList *bdrv_named_nodes_list(void); > +BlockDriverState *bdrv_lookup_bs(bool has_device, const char *device, > + bool has_node_name, const char *node_name, > + Error **errp); > BlockDriverState *bdrv_next(BlockDriverState *bs); > void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), >void *opaque); > diff --git a/qapi-schema.json b/qapi-schema.json > index 0dadd5d..903fcb6 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -1676,7 +1676,11 @@ > # determine which ones are encrypted, set the passwords with this command, > and > # then start the guest with the @cont command. > # > -# @device: the name of the device to set the password on > +# Either @device or @node-name must be set but not both. > +# > +# @device: #optional the name of the block backend device to set the > password on > +# > +# @node-name: #optional graph node name to set the password on (Since 2.0) > # > # @password: the password to use for the device > # > @@ -1690,7 +1694,8 @@ > # > # Since: 0.14.0 > ## > -{ 'command': 'block_passwd', 'data': {'device': 'str', 'password': 'str'} } > +{ 'command': 'block_passwd', 'data': {'*device': 'str', > + '*node-name': 'str', 'password': > 'str'} } > > ## > # @balloon: > diff --git a/qmp-commands.hx b/qmp-commands.hx > index d644fe9..1451c1a 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -1452,7 +1452,7 @@ EQMP > > { > .name = "block_passwd", > -.args_type = "device:B,password:s", > +.args_type = "device:s?,node-name:s?,password:s", > .mhandler.cmd_new = qmp_marshal_input_block_passwd, > }, > > @@ -1465,6 +1465,7 @@ Set the password of encrypted block devices. > Arguments: > > - "device": device name (json-string) > +- "node-name": name in the block driver state graph (json-string) > - "password": password (json-string) > > Example: > -- > 1.8.3.2 > > Reviewed-by: Fam Zheng
Re: [Qemu-devel] [PATCH V5 5/7] block: Create authorizations mechanism for external snapshot and resize.
On Thu, 12/12 16:33, Benoît Canet wrote: > Signed-off-by: Benoit Canet > --- > block.c | 65 > --- > block/blkverify.c | 2 +- > blockdev.c| 2 +- > include/block/block.h | 20 +++ > include/block/block_int.h | 12 ++--- > 5 files changed, 77 insertions(+), 24 deletions(-) > > diff --git a/block.c b/block.c > index 22190a4..57946b7 100644 > --- a/block.c > +++ b/block.c > @@ -4992,21 +4992,68 @@ int bdrv_amend_options(BlockDriverState *bs, > QEMUOptionParameter *options) > return bs->drv->bdrv_amend_options(bs, options); > } > > -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs) > +/* Used to recurse on single child block filters. > + * Single child block filter will store their child in bs->file. > + */ > +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, > + BlockDriverState *candidate) > { > -if (bs->drv->bdrv_check_ext_snapshot) { > -return bs->drv->bdrv_check_ext_snapshot(bs); > +if (!bs->drv) { > +return false; > +} > + > +if (!bs->drv->authorizations[BS_IS_A_FILTER]) { > +if (bs == candidate) { > +return true; > +} else { > +return false; > +} > +} > + > +if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) { > +return false; > } > > -if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) > { > -return bs->file->drv->bdrv_check_ext_snapshot(bs); > +if (!bs->file) { > +return false; > +} > + > +return bdrv_recurse_is_first_non_filter(bs->file, candidate); > +} > + > +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, > + BlockDriverState *candidate) > +{ > +if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) { > +return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); > } > > -/* external snapshots are allowed by default */ > -return EXT_SNAPSHOT_ALLOWED; > +return bdrv_generic_is_first_non_filter(bs, candidate); > } > > -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs) > +/* This function check if the candidate is the first non filter bs down it's s/check/checks/ > + * bs chain. Since we don't have pointers to parents it explore all bs chains > + * from the top. Some filters can choose not to pass down the recursion. > + */ > +bool bdrv_is_first_non_filter(BlockDriverState *candidate) > { > -return EXT_SNAPSHOT_FORBIDDEN; > +BlockDriverState *bs; > + > +/* walk down the bs forest recursively */ > +QTAILQ_FOREACH(bs, &bdrv_states, device_list) { > +bool perm; > + > +if (!bs->file) { > +continue; > +} > + > +perm = bdrv_recurse_is_first_non_filter(bs->file, candidate); > + > +/* candidate is the first non filter */ > +if (perm) { > +return true; > +} > +} > + > +return false; > } > diff --git a/block/blkverify.c b/block/blkverify.c > index 3c63528..853afa9 100644 > --- a/block/blkverify.c > +++ b/block/blkverify.c > @@ -417,7 +417,7 @@ static BlockDriver bdrv_blkverify = { > .bdrv_aio_writev= blkverify_aio_writev, > .bdrv_aio_flush = blkverify_aio_flush, > > -.bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden, > +.authorizations = { true, false }, > }; > > static void bdrv_blkverify_init(void) > diff --git a/blockdev.c b/blockdev.c > index 838df50..ebb8f48 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -1236,7 +1236,7 @@ static void > external_snapshot_prepare(BlkTransactionState *common, > } > } > > -if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) { > +if (!bdrv_is_first_non_filter(state->old_bs)) { > error_set(errp, QERR_FEATURE_DISABLED, "snapshot"); > return; > } > diff --git a/include/block/block.h b/include/block/block.h > index f7d8017..16812b0 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -283,16 +283,16 @@ int bdrv_amend_options(BlockDriverState *bs_new, > QEMUOptionParameter *options); > /* external snapshots */ > > typedef enum { > -EXT_SNAPSHOT_ALLOWED, > -EXT_SNAPSHOT_FORBIDDEN, > -} ExtSnapshotPerm; > - > -/* return EXT_SNAPSHOT_ALLOWED if external snapshot is allowed > - * return EXT_SNAPSHOT_FORBIDDEN if external snapshot is forbidden > - */ > -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs); > -/* helper used to forbid external snapshots like in blkverify */ > -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs); > +BS_IS_A_FILTER, > +BS_FILTER_PASS_DOWN, > +BS_AUTHORIZATION_COUNT, > +} BsAuthorization; > + > +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, > + BlockDriverState
Re: [Qemu-devel] [PATCH V5 6/7] qmp: Allow block_resize to manipulate bs graph nodes.
On Thu, 12/12 16:33, Benoît Canet wrote: > Signed-off-by: Benoit Canet > --- > blockdev.c | 18 ++ > hmp.c| 2 +- > qapi-schema.json | 10 -- > qmp-commands.hx | 3 ++- > 4 files changed, 25 insertions(+), 8 deletions(-) > > diff --git a/blockdev.c b/blockdev.c > index ebb8f48..374d03d 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -1676,14 +1676,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, > QObject **ret_data) > return 0; > } > > -void qmp_block_resize(const char *device, int64_t size, Error **errp) > +void qmp_block_resize(bool has_device, const char *device, > + bool has_node_name, const char *node_name, > + int64_t size, Error **errp) > { > +Error *local_err = NULL; > BlockDriverState *bs; > int ret; > > -bs = bdrv_find(device); > -if (!bs) { > -error_set(errp, QERR_DEVICE_NOT_FOUND, device); > +bs = bdrv_lookup_bs(has_device, device, > +has_node_name, node_name, > +&local_err); > +if (error_is_set(&local_err)) { > +error_propagate(errp, local_err); > +return; > +} > + > +if (!bdrv_is_first_non_filter(bs)) { > +error_set(errp, QERR_FEATURE_DISABLED, "resize"); > return; > } > > diff --git a/hmp.c b/hmp.c > index 3820fbe..906ddb7 100644 > --- a/hmp.c > +++ b/hmp.c > @@ -892,7 +892,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) > int64_t size = qdict_get_int(qdict, "size"); > Error *errp = NULL; > > -qmp_block_resize(device, size, &errp); > +qmp_block_resize(true, device, false, NULL, size, &errp); > hmp_handle_error(mon, &errp); > } > > diff --git a/qapi-schema.json b/qapi-schema.json > index 903fcb6..3977619 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -1722,7 +1722,11 @@ > # > # Resize a block image while a guest is running. > # > -# @device: the name of the device to get the image resized > +# Either @device or @node-name must be set but not both. > +# > +# @device: #optional the name of the device to get the image resized > +# > +# @node-name: #optional graph node name to get the image resized (Since 2.0) > # > # @size: new image size in bytes > # > @@ -1731,7 +1735,9 @@ > # > # Since: 0.14.0 > ## > -{ 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }} > +{ 'command': 'block_resize', 'data': { '*device': 'str', > + '*node-name': 'str', > + 'size': 'int' }} > > ## > # @NewImageMode > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 1451c1a..5696b08 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -880,7 +880,7 @@ EQMP > > { > .name = "block_resize", > -.args_type = "device:B,size:o", > +.args_type = "device:s?,node-name:s?,size:o", > .mhandler.cmd_new = qmp_marshal_input_block_resize, > }, > > @@ -893,6 +893,7 @@ Resize a block image while a guest is running. > Arguments: > > - "device": the device's ID, must be unique (json-string) > +- "node-name": the node name in the block driver state graph (json-string) > - "size": new size > > Example: > -- > 1.8.3.2 > > Reviewed-by: Fam Zheng
Re: [Qemu-devel] [PATCH V5 7/7] qmp: Allow to take external snapshots on bs graphs node.
ns with an > * extended QMP command? */ > -ret = bdrv_open(state->new_bs, new_image_file, NULL, > +ret = bdrv_open(state->new_bs, new_image_file, options, > flags | BDRV_O_NO_BACKING, drv, &local_err); > if (ret != 0) { > error_propagate(errp, local_err); > } > + > +QDECREF(options); > } > > static void external_snapshot_commit(BlkTransactionState *common) > diff --git a/hmp.c b/hmp.c > index 906ddb7..47dcf0c 100644 > --- a/hmp.c > +++ b/hmp.c > @@ -971,7 +971,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) > } > > mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; > -qmp_blockdev_snapshot_sync(device, filename, !!format, format, > +qmp_blockdev_snapshot_sync(true, device, false, NULL, > + filename, false, NULL, > + !!format, format, > true, mode, &errp); > hmp_handle_error(mon, &errp); > } > diff --git a/qapi-schema.json b/qapi-schema.json > index 3977619..d7afb69 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -1759,18 +1759,25 @@ > ## > # @BlockdevSnapshot > # > -# @device: the name of the device to generate the snapshot from. > +# Either @device or @node-name must be set but not both. > +# > +# @device: #optional the name of the device to generate the snapshot from. > +# > +# @node-name: #optional graph node name to generate the snapshot from (Since > 2.0) > # > # @snapshot-file: the target of the new image. A new file will be created. > # > +# @snapshot-node-name: #optional the graph node name of the new image (Since > 2.0) > +# > # @format: #optional the format of the snapshot image, default is 'qcow2'. > # > # @mode: #optional whether and how QEMU should create a new image, default is > #'absolute-paths'. > ## > { 'type': 'BlockdevSnapshot', > - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', > -'*mode': 'NewImageMode' } } > + 'data': { '*device': 'str', '*node-name': 'str', > +'snapshot-file': 'str', '*snapshot-node-name': 'str', > +'*format': 'str', '*mode': 'NewImageMode' } } > > ## > # @BlockdevSnapshotInternal > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 5696b08..b62b0f5 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -1038,7 +1038,9 @@ actions array: > - "data": a dictionary. The contents depend on the value >of "type". When "type" is "blockdev-snapshot-sync": >- "device": device name to snapshot (json-string) > + - "node-name": graph node name to snapshot (json-string) >- "snapshot-file": name of new image file (json-string) > + - "snapshot-node-name": graph node name of the new snapshot > (json-string) >- "format": format of new image (json-string, optional) >- "mode": whether and how QEMU should create the snapshot file > (NewImageMode, optional, default "absolute-paths") > @@ -1053,6 +1055,11 @@ Example: > { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0", > "snapshot-file": > "/some/place/my-image", > "format": "qcow2" } }, > + { 'type': 'blockdev-snapshot-sync', 'data' : { "node-name": > "myfile", > + "snapshot-file": > "/some/place/my-image2", > + "snapshot-node-name": "node3432", > + "mode": "existing", > + "format": "qcow2" } }, > { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1", > "snapshot-file": > "/some/place/my-image2", > "mode": "existing", > @@ -1066,7 +1073,7 @@ EQMP > > { > .name = "blockdev-snapshot-sync", > -.args_type = "device:B,snapshot-file:s,format:s?,mode:s?", > +.args_type = > "device:s?,node-name:s?,snapshot-file:s,snapshot-node-name:s?,format:s?,mode:s?", > .mhandler.cmd_new = qmp_marshal_input_blockdev_snapshot_sync, > }, > > @@ -1083,7 +1090,9 @@ snapshot image, default is qcow2. > Arguments: > > - "device": device name to snapshot (json-string) > +- "node-name": graph node name to snapshot (json-string) > - "snapshot-file": name of new image file (json-string) > +- "snapshot-node-name": graph node name of the new snapshot (json-string) > - "mode": whether and how QEMU should create the snapshot file >(NewImageMode, optional, default "absolute-paths") > - "format": format of new image (json-string, optional) > -- > 1.8.3.2 > > Reviewed-by: Fam Zheng
Re: [Qemu-devel] [PATCH V5 0/7] Giving names to BlockDriverState graph nodes
On Thu, 12/12 16:33, Benoît Canet wrote: > v5: > block empty node names [Kevin] > factorize setting of node-name option [Kevin] > NULL terminate node_name on removal [Kevin] > make query-named-block-nodes return BlockDeviceInfo structure [Eric] > Change some doc in query-named-block-nodes [Eric] > Document the choice of the QMP API for node name [Eric] > Use the same authorization as snapshot on block resize [Kevin] > Rebase the series [Kevin] Looks mostly good to me. But the external snapshot permission protection interface looks a bit obsecuer to me. Let's wait for other reviewers' comments. Thanks, Fam > > Benoît Canet (7): > block: Add bs->node_name to hold the name of a bs node of the bs > graph. > block: Allow the user to define "node-name" option. > qmp: Add a command to list the named BlockDriverState nodes. > qmp: Allow to change password on named block driver states. > block: Create authorizations mechanism for external snapshot and > resize. > qmp: Allow block_resize to manipulate bs graph nodes. > qmp: Allow to take external snapshots on bs graphs node. > > block.c | 210 > +- > block/blkverify.c | 2 +- > block/qapi.c | 109 > blockdev.c| 93 > hmp.c | 8 +- > include/block/block.h | 23 +++-- > include/block/block_int.h | 21 - > include/block/qapi.h | 1 + > qapi-schema.json | 48 +-- > qmp-commands.hx | 78 - > 10 files changed, 471 insertions(+), 122 deletions(-) > > -- > 1.8.3.2 >
[Qemu-devel] [PATCH] vmdk: Check for overhead when opening
Report an error if file size is even smaller than metadata. Signed-off-by: Fam Zheng --- block/vmdk.c | 7 +++ tests/qemu-iotests/059 | 6 ++ tests/qemu-iotests/059.out | 5 + 3 files changed, 18 insertions(+) diff --git a/block/vmdk.c b/block/vmdk.c index c6b60b4..7b53d41 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -640,6 +640,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } +if (bdrv_getlength(file) < +le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) { +error_report("File truncated, expecting at least %lld bytes", +le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE); +return -EINVAL; +} + ret = vmdk_add_extent(bs, file, false, le64_to_cpu(header.capacity), le64_to_cpu(header.gd_offset) << 9, diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059 index 65bea1d..30671c0 100755 --- a/tests/qemu-iotests/059 +++ b/tests/qemu-iotests/059 @@ -95,6 +95,12 @@ EOF _img_info echo +echo "=== Testing truncated sparse ===" +IMGOPTS="subformat=monolithicSparse" _make_test_img 100G +truncate -s 10M $TEST_IMG +_img_info + +echo echo "=== Testing version 3 ===" _use_sample_img iotest-version3.vmdk.bz2 _img_info diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index 16ab7c6..7cc0e11 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -2043,6 +2043,11 @@ qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Invalid extent lines: RW 12582912 VMFS "dummy.IMGFMT" 1 +=== Testing truncated sparse === +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400 +qemu-img: File truncated, expecting at least 13172736 bytes +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Wrong medium type + === Testing version 3 === image: TEST_DIR/iotest-version3.IMGFMT file format: IMGFMT -- 1.8.5.3
[Qemu-devel] [PATCH v4 0/6] vmdk: zeroed-grain GTE support
Added support for zeroed-grain GTE to VMDK according to VMDK Spec 5.0[1]. [1] Virtual Disk Format 5.0 - VMware, http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk Changes since v3: - 5/6: Remove tmp - 6/6: Update L2 Cache Remove redundant assignment to m_data Simplify l2_offset assignment to m_data Fix m_data.offset endian. Changes since v2: - all: Added 5/6 (vmdk: store fields of VmdkMetaData in cpu endian) - 6/6: Avoid side-effect of vmdk_L2update. Change function comment to gtkdoc stype. Fix VMDK4_FLAG_ZG. Changes since v1: - all: fix From: field - 1/5: squash one line of ret code macro change from 2/5 - 2/5: change VMDK4_FLAG_ZG to VMDK4_FLAG_ZERO_GRAIN - 3/5: move BLOCK_OPT_ZEROED_GRAIN defination from block_int.h to vmdk.c - 5/5: fix metadata update issue, unit test with cases 033 034 Fam Zheng (6): vmdk: named return code. vmdk: add support for “zeroed‐grain” GTE vmdk: Add option to create zeroed-grain image vmdk: change magic number to macro vmdk: store fields of VmdkMetaData in cpu endian vmdk: add bdrv_co_write_zeroes block/vmdk.c | 208 +-- 1 file changed, 145 insertions(+), 63 deletions(-) -- 1.8.1.4
[Qemu-devel] [PATCH v4 6/6] vmdk: add bdrv_co_write_zeroes
Use special offset to write zeroes efficiently, when zeroed-grain GTE is available. If zero-write an allocated cluster, cluster is leaked because its offset pointer is overwritten by "0x1". Signed-off-by: Fam Zheng --- block/vmdk.c | 86 +++- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index d98f304..d6dee7f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -124,6 +124,7 @@ typedef struct VmdkMetaData { unsigned int l2_index; unsigned int l2_offset; int valid; +uint32_t *l2_cache_entry; } VmdkMetaData; typedef struct VmdkGrainMarker { @@ -835,6 +836,9 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) return VMDK_ERROR; } } +if (m_data->l2_cache_entry) { +*m_data->l2_cache_entry = m_data->offset; +} return VMDK_OK; } @@ -905,6 +909,14 @@ static int get_cluster_offset(BlockDriverState *bs, l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; *cluster_offset = le32_to_cpu(l2_table[l2_index]); +if (m_data) { +m_data->valid = 1; +m_data->l1_index = l1_index; +m_data->l2_index = l2_index; +m_data->offset = *cluster_offset; +m_data->l2_offset = l2_offset; +m_data->l2_cache_entry = &l2_table[l2_index]; +} if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { zeroed = true; } @@ -938,10 +950,6 @@ static int get_cluster_offset(BlockDriverState *bs, if (m_data) { m_data->offset = *cluster_offset; -m_data->l1_index = l1_index; -m_data->l2_index = l2_index; -m_data->l2_offset = l2_offset; -m_data->valid = 1; } } *cluster_offset <<= 9; @@ -1164,8 +1172,17 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, return ret; } +/** + * vmdk_write: + * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just + * + * Returns: error code with 0 for success. + */ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) + const uint8_t *buf, int nb_sectors, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; @@ -1211,7 +1228,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, &cluster_offset); } } -if (ret) { +if (ret == VMDK_ERROR) { return -EINVAL; } extent_begin_sector = extent->end_sector - extent->sectors; @@ -1221,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (n > nb_sectors) { n = nb_sectors; } - -ret = vmdk_write_extent(extent, -cluster_offset, index_in_cluster * 512, -buf, n, sector_num); -if (ret) { -return ret; -} -if (m_data.valid) { -/* update L2 tables */ -if (vmdk_L2update(extent, &m_data) == -1) { -return -EIO; +if (zeroed) { +/* Do zeroed write, buf is ignored */ +if (extent->has_zero_grain && +index_in_cluster == 0 && +n >= extent->cluster_sectors) { +n = extent->cluster_sectors; +if (!zero_dry_run) { +m_data.offset = VMDK_GTE_ZEROED; +/* update L2 tables */ +if (vmdk_L2update(extent, &m_data) != VMDK_OK) { +return -EIO; +} +} +} else { +return -ENOTSUP; +} +} else { +ret = vmdk_write_extent(extent, +cluster_offset, index_in_cluster * 512, +buf, n, sector_num); +if (ret) { +return ret; +} +if (m_data.valid) { +/* update L2 tables */ +if (vmdk_L2update(extent, &m_data) != VMDK_OK) { +return -EIO; +} } } nb_sectors -= n; @@ -1257,7 +1291,22 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); -ret = vmdk_write(bs, sector_num, buf, nb_sectors); +ret = vmdk_write(bs, sector_num, b
[Qemu-devel] [PATCH v4 5/6] vmdk: store fields of VmdkMetaData in cpu endian
Previously VmdkMetaData.offset is stored little endian while other fields are cpu endian. This changes offset to cpu endian and convert before writing to image. Signed-off-by: Fam Zheng --- block/vmdk.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 0463d3b..d98f304 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -813,14 +813,15 @@ static int get_whole_cluster(BlockDriverState *bs, static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) { +uint32_t offset; +QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); +offset = cpu_to_le32(m_data->offset); /* update L2 table */ if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), -&(m_data->offset), -sizeof(m_data->offset) -) < 0) { +&offset, sizeof(offset)) < 0) { return VMDK_ERROR; } /* update backup L2 table */ @@ -830,8 +831,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), -&(m_data->offset), sizeof(m_data->offset) -) < 0) { +&offset, sizeof(offset)) < 0) { return VMDK_ERROR; } } @@ -848,7 +848,7 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; -uint32_t min_count, *l2_table, tmp = 0; +uint32_t min_count, *l2_table; bool zeroed = false; if (m_data) { @@ -924,8 +924,7 @@ static int get_cluster_offset(BlockDriverState *bs, } *cluster_offset >>= 9; -tmp = cpu_to_le32(*cluster_offset); -l2_table[l2_index] = tmp; +l2_table[l2_index] = cpu_to_le32(*cluster_offset); /* First of all we write grain itself, to avoid race condition * that may to corrupt the image. @@ -938,7 +937,7 @@ static int get_cluster_offset(BlockDriverState *bs, } if (m_data) { -m_data->offset = tmp; +m_data->offset = *cluster_offset; m_data->l1_index = l1_index; m_data->l2_index = l2_index; m_data->l2_offset = l2_offset; -- 1.8.1.4