On Wed, Nov 02, 2016 at 01:50:55PM -0400, John Snow wrote: > Refactor backup_start as backup_job_create, which only creates the job, > but does not automatically start it. The old interface, 'backup_start', > is not kept in favor of limiting the number of nearly-identical interfaces > that would have to be edited to keep up with QAPI changes in the future. > > Callers that wish to synchronously start the backup_block_job can > instead just call block_job_start immediately after calling > backup_job_create. > > Transactions are updated to use the new interface, calling block_job_start > only during the .commit phase, which helps prevent race conditions where > jobs may finish before we even finish building the transaction. This may > happen, for instance, during empty block backup jobs. > > Reported-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> > Signed-off-by: John Snow <js...@redhat.com> > --- > block/backup.c | 26 ++++++++------- > block/replication.c | 12 ++++--- > blockdev.c | 83 > ++++++++++++++++++++++++++++++----------------- > include/block/block_int.h | 23 ++++++------- > 4 files changed, 87 insertions(+), 57 deletions(-) > > diff --git a/block/backup.c b/block/backup.c > index ae1b99a..ea38733 100644 > --- a/block/backup.c > +++ b/block/backup.c > @@ -543,7 +543,7 @@ static const BlockJobDriver backup_job_driver = { > .drain = backup_drain, > }; > > -void backup_start(const char *job_id, BlockDriverState *bs, > +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, > BlockDriverState *target, int64_t speed, > MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, > bool compress, > @@ -563,52 +563,52 @@ void backup_start(const char *job_id, BlockDriverState > *bs, > > if (bs == target) { > error_setg(errp, "Source and target cannot be the same"); > - return; > + return NULL; > } > > if (!bdrv_is_inserted(bs)) { > error_setg(errp, "Device is not inserted: %s", > bdrv_get_device_name(bs)); > - return; > + return NULL; > } > > if (!bdrv_is_inserted(target)) { > error_setg(errp, "Device is not inserted: %s", > bdrv_get_device_name(target)); > - return; > + return NULL; > } > > if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) { > error_setg(errp, "Compression is not supported for this drive %s", > bdrv_get_device_name(target)); > - return; > + return NULL; > } > > if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { > - return; > + return NULL; > } > > if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { > - return; > + return NULL; > } > > if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { > if (!sync_bitmap) { > error_setg(errp, "must provide a valid bitmap name for " > "\"incremental\" sync mode"); > - return; > + return NULL; > } > > /* Create a new bitmap, and freeze/disable this one. */ > if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) { > - return; > + return NULL; > } > } else if (sync_bitmap) { > error_setg(errp, > "a sync_bitmap was provided to backup_run, " > "but received an incompatible sync_mode (%s)", > MirrorSyncMode_lookup[sync_mode]); > - return; > + return NULL; > } > > len = bdrv_getlength(bs); > @@ -655,8 +655,8 @@ void backup_start(const char *job_id, BlockDriverState > *bs, > block_job_add_bdrv(&job->common, target); > job->common.len = len; > block_job_txn_add_job(txn, &job->common); > - block_job_start(&job->common); > - return; > + > + return &job->common; > > error: > if (sync_bitmap) { > @@ -666,4 +666,6 @@ void backup_start(const char *job_id, BlockDriverState > *bs, > backup_clean(&job->common); > block_job_unref(&job->common); > } > + > + return NULL; > } > diff --git a/block/replication.c b/block/replication.c > index d5e2b0f..729dd12 100644 > --- a/block/replication.c > +++ b/block/replication.c > @@ -421,6 +421,7 @@ static void replication_start(ReplicationState *rs, > ReplicationMode mode, > int64_t active_length, hidden_length, disk_length; > AioContext *aio_context; > Error *local_err = NULL; > + BlockJob *job; > > aio_context = bdrv_get_aio_context(bs); > aio_context_acquire(aio_context); > @@ -508,17 +509,18 @@ static void replication_start(ReplicationState *rs, > ReplicationMode mode, > bdrv_op_block_all(top_bs, s->blocker); > bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); > > - backup_start(NULL, s->secondary_disk->bs, s->hidden_disk->bs, 0, > - MIRROR_SYNC_MODE_NONE, NULL, false, > - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, > - BLOCK_JOB_INTERNAL, backup_job_completed, bs, > - NULL, &local_err); > + job = backup_job_create(NULL, s->secondary_disk->bs, > s->hidden_disk->bs, > + 0, MIRROR_SYNC_MODE_NONE, NULL, false, > + BLOCKDEV_ON_ERROR_REPORT, > + BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL, > + backup_job_completed, bs, NULL, &local_err); > if (local_err) { > error_propagate(errp, local_err); > backup_job_cleanup(bs); > aio_context_release(aio_context); > return; > } > + block_job_start(job); > break; > default: > aio_context_release(aio_context); > diff --git a/blockdev.c b/blockdev.c > index 102ca9f..871aa35 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -1811,7 +1811,7 @@ typedef struct DriveBackupState { > BlockJob *job; > } DriveBackupState; > > -static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, > +static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, > Error **errp);
It'd be nice to someday remove this forward declaration and just move the function up here. But I won't ask you to do it :) > > static void drive_backup_prepare(BlkActionState *common, Error **errp) > @@ -1835,23 +1835,27 @@ static void drive_backup_prepare(BlkActionState > *common, Error **errp) > bdrv_drained_begin(bs); > state->bs = bs; > > - do_drive_backup(backup, common->block_job_txn, &local_err); > + state->job = do_drive_backup(backup, common->block_job_txn, &local_err); > if (local_err) { > error_propagate(errp, local_err); > return; > } > +} > > - state->job = state->bs->job; > +static void drive_backup_commit(BlkActionState *common) > +{ > + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); > + if (state->job) { Kevin has a point on all the state->job checks in the _commit/_abort functions. I think the only way that could happen is if the _prepare() was not run for some reason. Maybe just make them all asserts()? With that change: Reviewed-by: Jeff Cody <jc...@redhat.com> > + block_job_start(state->job); > + } > } > > static void drive_backup_abort(BlkActionState *common) > { > DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); > - BlockDriverState *bs = state->bs; > > - /* Only cancel if it's the job we started */ > - if (bs && bs->job && bs->job == state->job) { > - block_job_cancel_sync(bs->job); > + if (state->job) { > + block_job_cancel_sync(state->job); > } > } > > @@ -1872,8 +1876,8 @@ typedef struct BlockdevBackupState { > AioContext *aio_context; > } BlockdevBackupState; > > -static void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, > - Error **errp); > +static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, > + Error **errp); > > static void blockdev_backup_prepare(BlkActionState *common, Error **errp) > { > @@ -1906,23 +1910,27 @@ static void blockdev_backup_prepare(BlkActionState > *common, Error **errp) > state->bs = bs; > bdrv_drained_begin(state->bs); > > - do_blockdev_backup(backup, common->block_job_txn, &local_err); > + state->job = do_blockdev_backup(backup, common->block_job_txn, > &local_err); > if (local_err) { > error_propagate(errp, local_err); > return; > } > +} > > - state->job = state->bs->job; > +static void blockdev_backup_commit(BlkActionState *common) > +{ > + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, > common); > + if (state->job) { > + block_job_start(state->job); > + } > } > > static void blockdev_backup_abort(BlkActionState *common) > { > BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, > common); > - BlockDriverState *bs = state->bs; > > - /* Only cancel if it's the job we started */ > - if (bs && bs->job && bs->job == state->job) { > - block_job_cancel_sync(bs->job); > + if (state->job) { > + block_job_cancel_sync(state->job); > } > } > > @@ -2072,12 +2080,14 @@ static const BlkActionOps actions[] = { > [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { > .instance_size = sizeof(DriveBackupState), > .prepare = drive_backup_prepare, > + .commit = drive_backup_commit, > .abort = drive_backup_abort, > .clean = drive_backup_clean, > }, > [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { > .instance_size = sizeof(BlockdevBackupState), > .prepare = blockdev_backup_prepare, > + .commit = blockdev_backup_commit, > .abort = blockdev_backup_abort, > .clean = blockdev_backup_clean, > }, > @@ -3106,11 +3116,13 @@ out: > aio_context_release(aio_context); > } > > -static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error > **errp) > +static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, > + Error **errp) > { > BlockDriverState *bs; > BlockDriverState *target_bs; > BlockDriverState *source = NULL; > + BlockJob *job = NULL; > BdrvDirtyBitmap *bmap = NULL; > AioContext *aio_context; > QDict *options = NULL; > @@ -3139,7 +3151,7 @@ static void do_drive_backup(DriveBackup *backup, > BlockJobTxn *txn, Error **errp) > > bs = qmp_get_root_bs(backup->device, errp); > if (!bs) { > - return; > + return NULL; > } > > aio_context = bdrv_get_aio_context(bs); > @@ -3213,10 +3225,10 @@ static void do_drive_backup(DriveBackup *backup, > BlockJobTxn *txn, Error **errp) > } > } > > - backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync, > - bmap, backup->compress, backup->on_source_error, > - backup->on_target_error, BLOCK_JOB_DEFAULT, > - NULL, NULL, txn, &local_err); > + job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, > + backup->sync, bmap, backup->compress, > + backup->on_source_error, backup->on_target_error, > + BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err); > bdrv_unref(target_bs); > if (local_err != NULL) { > error_propagate(errp, local_err); > @@ -3225,11 +3237,17 @@ static void do_drive_backup(DriveBackup *backup, > BlockJobTxn *txn, Error **errp) > > out: > aio_context_release(aio_context); > + return job; > } > > void qmp_drive_backup(DriveBackup *arg, Error **errp) > { > - return do_drive_backup(arg, NULL, errp); > + > + BlockJob *job; > + job = do_drive_backup(arg, NULL, errp); > + if (job) { > + block_job_start(job); > + } > } > > BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) > @@ -3237,12 +3255,14 @@ BlockDeviceInfoList > *qmp_query_named_block_nodes(Error **errp) > return bdrv_named_nodes_list(errp); > } > > -void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error > **errp) > +BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, > + Error **errp) > { > BlockDriverState *bs; > BlockDriverState *target_bs; > Error *local_err = NULL; > AioContext *aio_context; > + BlockJob *job = NULL; > > if (!backup->has_speed) { > backup->speed = 0; > @@ -3262,7 +3282,7 @@ void do_blockdev_backup(BlockdevBackup *backup, > BlockJobTxn *txn, Error **errp) > > bs = qmp_get_root_bs(backup->device, errp); > if (!bs) { > - return; > + return NULL; > } > > aio_context = bdrv_get_aio_context(bs); > @@ -3284,20 +3304,25 @@ void do_blockdev_backup(BlockdevBackup *backup, > BlockJobTxn *txn, Error **errp) > goto out; > } > } > - backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync, > - NULL, backup->compress, backup->on_source_error, > - backup->on_target_error, BLOCK_JOB_DEFAULT, > - NULL, NULL, txn, &local_err); > + job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, > + backup->sync, NULL, backup->compress, > + backup->on_source_error, backup->on_target_error, > + BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err); > if (local_err != NULL) { > error_propagate(errp, local_err); > } > out: > aio_context_release(aio_context); > + return job; > } > > void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) > { > - do_blockdev_backup(arg, NULL, errp); > + BlockJob *job; > + job = do_blockdev_backup(arg, NULL, errp); > + if (job) { > + block_job_start(job); > + } > } > > /* Parameter check and block job starting for drive mirroring. > diff --git a/include/block/block_int.h b/include/block/block_int.h > index b02abbd..83a423c 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -748,7 +748,7 @@ void mirror_start(const char *job_id, BlockDriverState > *bs, > bool unmap, Error **errp); > > /* > - * backup_start: > + * backup_job_create: > * @job_id: The id of the newly-created job, or %NULL to use the > * device name of @bs. > * @bs: Block device to operate on. > @@ -764,18 +764,19 @@ void mirror_start(const char *job_id, BlockDriverState > *bs, > * @opaque: Opaque pointer value passed to @cb. > * @txn: Transaction that this job is part of (may be NULL). > * > - * Start a backup operation on @bs. Clusters in @bs are written to @target > + * Create a backup operation on @bs. Clusters in @bs are written to @target > * until the job is cancelled or manually completed. > */ > -void backup_start(const char *job_id, BlockDriverState *bs, > - BlockDriverState *target, int64_t speed, > - MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, > - bool compress, > - BlockdevOnError on_source_error, > - BlockdevOnError on_target_error, > - int creation_flags, > - BlockCompletionFunc *cb, void *opaque, > - BlockJobTxn *txn, Error **errp); > +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, > + BlockDriverState *target, int64_t speed, > + MirrorSyncMode sync_mode, > + BdrvDirtyBitmap *sync_bitmap, > + bool compress, > + BlockdevOnError on_source_error, > + BlockdevOnError on_target_error, > + int creation_flags, > + BlockCompletionFunc *cb, void *opaque, > + BlockJobTxn *txn, Error **errp); > > void hmp_drive_add_node(Monitor *mon, const char *optstr); > > -- > 2.7.4 >