06.07.2021 19:11, Lukas Straub wrote:
The replication driver needs access to the children block-nodes of
it's child so it can issue bdrv_make_empty to manage the replication.
However, it does this by directly copying the BdrvChilds, which is
wrong.
Fix this by properly attaching the block-nodes with
bdrv_attach_child().
Also, remove a workaround introduced in commit
6ecbc6c52672db5c13805735ca02784879ce8285
"replication: Avoid blk_make_empty() on read-only child".
Signed-off-by: Lukas Straub <lukasstra...@web.de>
---
-v2: Test for BDRV_CHILD_PRIMARY in replication_child_perm, since
bs->file might not be set yet. (Vladimir)
block/replication.c | 94 +++++++++++++++++++++++++++++----------------
1 file changed, 61 insertions(+), 33 deletions(-)
diff --git a/block/replication.c b/block/replication.c
index 52163f2d1f..fd8cb728a3 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -166,7 +166,12 @@ static void replication_child_perm(BlockDriverState *bs,
BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
- *nperm = BLK_PERM_CONSISTENT_READ;
+ if (role & BDRV_CHILD_PRIMARY) {
+ *nperm = BLK_PERM_CONSISTENT_READ;
+ } else {
+ *nperm = 0;
+ }
Why you drop READ access for other children? You don't mention it in
commit-msg..
Upd: ok now I see that we are not going to read from hidden_disk child, and that's the
only "other" child that worth to mention.
Still, we should be sure that hidden_disk child gets WRITE permission in case
we are going to call bdrv_make_empty on it.
+
if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
*nperm |= BLK_PERM_WRITE;
}
@@ -340,17 +345,7 @@ static void secondary_do_checkpoint(BDRVReplicationState
*s, Error **errp)
return;
}
- BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
- BLK_PERM_WRITE, BLK_PERM_ALL);
- blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- blk_unref(blk);
- return;
- }
-
- ret = blk_make_empty(blk, errp);
- blk_unref(blk);
+ ret = bdrv_make_empty(s->hidden_disk, errp);
So, here you rely on BLK_PERM_WRITE being set in replication_child_perm()..
Probably that's OK, however logic is changed. Shouldn't we always require write
permission in replication_child_perm() for hidden_disk ?
if (ret < 0) {
return;
}
@@ -365,27 +360,35 @@ static void reopen_backing_file(BlockDriverState *bs,
bool writable,
Error **errp)
{
BDRVReplicationState *s = bs->opaque;
+ BdrvChild *hidden_disk, *secondary_disk;
BlockReopenQueue *reopen_queue = NULL;
+ /*
+ * s->hidden_disk and s->secondary_disk may not be set yet, as they will
+ * only be set after the children are writable.
+ */
+ hidden_disk = bs->file->bs->backing;
+ secondary_disk = hidden_disk->bs->backing;
+
if (writable) {
- s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
- s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
+ s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
+ s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
}
- bdrv_subtree_drained_begin(s->hidden_disk->bs);
- bdrv_subtree_drained_begin(s->secondary_disk->bs);
+ bdrv_subtree_drained_begin(hidden_disk->bs);
+ bdrv_subtree_drained_begin(secondary_disk->bs);
That kind of staff may be done as a separate preparation patch, with
no-logic-change refactoring, this makes the main logic-change patch simpler.
if (s->orig_hidden_read_only) {
QDict *opts = qdict_new();
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
- reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+ reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
opts, true);
}
if (s->orig_secondary_read_only) {
QDict *opts = qdict_new();
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
- reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+ reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
opts, true);
}, probably it could be a separate patch if it is needed.
@@ -393,8 +396,8 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
bdrv_reopen_multiple(reopen_queue, errp);
}
- bdrv_subtree_drained_end(s->hidden_disk->bs);
- bdrv_subtree_drained_end(s->secondary_disk->bs);
+ bdrv_subtree_drained_end(hidden_disk->bs);
+ bdrv_subtree_drained_end(secondary_disk->bs);
}
static void backup_job_cleanup(BlockDriverState *bs)
@@ -451,6 +454,7 @@ static void replication_start(ReplicationState *rs,
ReplicationMode mode,
BlockDriverState *bs = rs->opaque;
BDRVReplicationState *s;
BlockDriverState *top_bs;
+ BdrvChild *active_disk, *hidden_disk, *secondary_disk;
int64_t active_length, hidden_length, disk_length;
AioContext *aio_context;
Error *local_err = NULL;
@@ -488,32 +492,32 @@ static void replication_start(ReplicationState *rs,
ReplicationMode mode,
case REPLICATION_MODE_PRIMARY:
break;
case REPLICATION_MODE_SECONDARY:
- s->active_disk = bs->file;
- if (!s->active_disk || !s->active_disk->bs ||
- !s->active_disk->bs->backing) {
+ active_disk = bs->file;
+ if (!active_disk || !active_disk->bs ||
+ !active_disk->bs->backing) {
error_setg(errp, "Active disk doesn't have backing file");
aio_context_release(aio_context);
return;
}
- s->hidden_disk = s->active_disk->bs->backing;
- if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+ hidden_disk = active_disk->bs->backing;
+ if (!hidden_disk->bs || !hidden_disk->bs->backing) {
error_setg(errp, "Hidden disk doesn't have backing file");
aio_context_release(aio_context);
return;
}
- s->secondary_disk = s->hidden_disk->bs->backing;
- if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+ secondary_disk = hidden_disk->bs->backing;
+ if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
error_setg(errp, "The secondary disk doesn't have block backend");
aio_context_release(aio_context);
return;
}
, probably it could be a separate patch if it is needed.
/* verify the length */
- active_length = bdrv_getlength(s->active_disk->bs);
- hidden_length = bdrv_getlength(s->hidden_disk->bs);
- disk_length = bdrv_getlength(s->secondary_disk->bs);
+ active_length = bdrv_getlength(active_disk->bs);
+ hidden_length = bdrv_getlength(hidden_disk->bs);
+ disk_length = bdrv_getlength(secondary_disk->bs);
if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
active_length != hidden_length || hidden_length != disk_length) {
error_setg(errp, "Active disk, hidden disk, secondary disk's
length"
@@ -523,10 +527,10 @@ static void replication_start(ReplicationState *rs,
ReplicationMode mode,
}
/* Must be true, or the bdrv_getlength() calls would have failed */
- assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+ assert(active_disk->bs->drv && hidden_disk->bs->drv);
- if (!s->active_disk->bs->drv->bdrv_make_empty ||
- !s->hidden_disk->bs->drv->bdrv_make_empty) {
+ if (!active_disk->bs->drv->bdrv_make_empty ||
+ !hidden_disk->bs->drv->bdrv_make_empty) {
error_setg(errp,
"Active disk or hidden disk doesn't support
make_empty");
aio_context_release(aio_context);
@@ -541,6 +545,28 @@ static void replication_start(ReplicationState *rs,
ReplicationMode mode,
return;
}
+ s->active_disk = active_disk;
+
+ bdrv_ref(hidden_disk->bs);
+ s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
+ &child_of_bds, BDRV_CHILD_DATA,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ aio_context_release(aio_context);
+ return;
+ }
Ok, the point of creating hidden_disk is to call bdrv_make_empty on it.
+
+ bdrv_ref(secondary_disk->bs);
+ s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
+ "secondary disk", &child_of_bds,
+ BDRV_CHILD_DATA, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ aio_context_release(aio_context);
+ return;
+ }
But s->secondary_disk child is actually unused.. No reason to create it.
+
/* start backup job now */
error_setg(&s->blocker,
"Block device is in use by internal backup job");
@@ -646,7 +672,9 @@ static void replication_done(void *opaque, int ret)
s->stage = BLOCK_REPLICATION_DONE;
s->active_disk = NULL;
+ bdrv_unref_child(bs, s->secondary_disk);
s->secondary_disk = NULL;
+ bdrv_unref_child(bs, s->hidden_disk);
s->hidden_disk = NULL;
s->error = 0;
} else {
For me it looks like the good way to update is:
1. drop s->active_disk. it seems to be just a copy of bs->file, better to use
bs->file directly, like other drivers do.
2. reduce usage of s->hidden_disk and s->secondary_disk, like you do in this
patch, using local variables instead. Also probably just drop s->secondary_disk..
3. introduce a child, to be used with bdrv_make_empty(s->hidden_disk)
And these are 3 separate patches. 1 and 2 may be merged, or instead 2 may be
divided into two (to refactor secondary_disk and hidden_disk separately)..
Still, I'm not a maintainer of replication, neither I have good understanding
of how it works, so don't take my advises to heart :)
--
Best regards,
Vladimir