On 9/18/25 10:06, Andrey Zhadchenko wrote:
dm-qcow2 is almost ready to merge intermediate images, thanks
to separated metadata tables and qio being per-qcow2.
So this patch mostly propagates which qcow2 we want to use
instead of tgt->top.
qcow2_merge_backward_complete is a bit harder due to in-middle
replacement and needing to recalculate img_id for a part of
image chain.
https://virtuozzo.atlassian.net/browse/VSTOR-101375
Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
---
v2:
- changed depth to u32
- reverted changes in set_backward_merge_in_process(): do queue stop
for not top qcow2 images at it's own level
- changed saving qcow2 in backward_merge to saving pointer to the
qcow2 which we later intend to change to lower delta
drivers/md/dm-qcow2-cmd.c | 58 ++++++++++++++++++++++++++++-----------
drivers/md/dm-qcow2.h | 1 +
2 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
index e77052ad61e6b..64bf282893e34 100644
--- a/drivers/md/dm-qcow2-cmd.c
+++ b/drivers/md/dm-qcow2-cmd.c
@@ -116,9 +116,9 @@ static int qcow2_service_iter(struct qcow2_target *tgt,
struct qcow2 *qcow2,
}
ALLOW_ERROR_INJECTION(qcow2_service_iter, ERRNO);
-static int qcow2_merge_common(struct qcow2_target *tgt)
+static int qcow2_merge_common(struct qcow2_target *tgt, struct qcow2 *qcow2)
{
- struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+ struct qcow2 *lower = qcow2->lower;
u32 clu_size = qcow2->clu_size;
loff_t end = lower->hdr.size;
@@ -139,9 +139,8 @@ static int qcow2_merge_forward(struct qcow2_target *tgt)
}
ALLOW_ERROR_INJECTION(qcow2_merge_forward, ERRNO);
-static int qcow2_break_l1cow(struct qcow2_target *tgt)
+static int qcow2_break_l1cow(struct qcow2_target *tgt, struct qcow2 *qcow2)
{
- struct qcow2 *qcow2 = tgt->top;
loff_t end = qcow2->hdr.size;
loff_t step = (u64)qcow2->l2_entries * qcow2->clu_size;
@@ -154,6 +153,15 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt,
{
LIST_HEAD(list);
+ /*
+ * There are no writes if it is not the top qcow2 image.
+ * so we do not need to stop and flush requests when setting
+ */
+ if (qcow2 != tgt->top && set) {
+ qcow2->backward_merge_in_process = set;
+ return;
+ }
+
/*
* To avoid race between allocations and COWS
* we completely stop queueing qios and wait
@@ -240,11 +248,17 @@ static int qcow2_merge_backward_progress(struct
qcow2_target *tgt,
static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);
-static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd, u32
depth)
{
- struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+ struct qcow2 *qcow2 = tgt->top, *lower, **pqcow2 = &tgt->top;
int ret;
+ while (depth-- && qcow2->lower) {
+ pqcow2 = &qcow2->lower;
+ qcow2 = qcow2->lower;
+ }
+ lower = qcow2->lower;
+
We don't really need double assignment in the loop, we can do with just
one by iterating singly over pqcow2 instead. We can derive qcow2 after
the loop. So I would suggest:
@@ -240,15 +248,23 @@ static int qcow2_merge_backward_progress(struct
qcow2_target *tgt,
static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt,
int efd);
-static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int
efd, u32 depth)
{
- struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+ struct qcow2 *qcow2, *lower, **pqcow2 = &tgt->top;
int ret;
lockdep_assert_held(&tgt->ctl_mutex);
+ while (depth-- && *pqcow2)
+ pqcow2 = &(*pqcow2)->lower;
+
+ qcow2 = *pqcow2;
+ if (!qcow2)
+ return -ENOENT;
+ lower = qcow2->lower;
if (!lower)
return -ENOENT;
+
if (!(lower->file->f_mode & FMODE_WRITE))
return -EACCES;
if (qcow2->clu_size != lower->clu_size)
lockdep_assert_held(&tgt->ctl_mutex);
We should probably check lockdep at the top of the function body, I
integrated the move to the top with the above diff.
Note: Functionally the patch looks good, feel free to add "Reviewed-by:
Pavel Tikhomirov <ptikhomi...@virtuozzo.com>".
if (!lower)
@@ -263,6 +277,7 @@ static int qcow2_merge_backward_start(struct qcow2_target
*tgt, int efd)
if (ret)
return ret;
+ tgt->backward_merge.pqcow2 = pqcow2;
tgt->backward_merge.state = BACKWARD_MERGE_START;
__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_START);
tgt->backward_merge.error = 0;
@@ -291,7 +306,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW);
mutex_unlock(&tgt->ctl_mutex);
- qcow2 = tgt->top;
+ qcow2 = *tgt->backward_merge.pqcow2;
lower = qcow2->lower;
/*
@@ -300,7 +315,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
* we'd have to freeze IO going to all data clusters
* under every L1 entry related to several snapshots.
*/
- ret = qcow2_break_l1cow(tgt);
+ ret = qcow2_break_l1cow(tgt, qcow2);
if (ret) {
QC_ERR(tgt->ti, "Can't break L1 COW");
goto out_err;
@@ -316,7 +331,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
/* Start merge */
backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING);
- ret = qcow2_merge_common(tgt);
+ ret = qcow2_merge_common(tgt, qcow2);
if (ret) {
set_backward_merge_in_process(tgt, qcow2, false);
ret2 = qcow2_set_image_file_features(lower, false);
@@ -350,16 +365,23 @@ void qcow2_merge_backward_work(struct work_struct *work)
static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
{
- struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+ struct qcow2 *qcow2 = *tgt->backward_merge.pqcow2, *i;
int ret;
lockdep_assert_held(&tgt->ctl_mutex);
if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
return -EBUSY;
+
+ *tgt->backward_merge.pqcow2 = qcow2->lower;
+
+ i = tgt->top;
+ while (i != qcow2->lower) {
+ i->img_id--;
+ i = i->lower;
+ }
+
__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING);
-
- tgt->top = lower;
smp_wmb(); /* Pairs with qcow2_ref_inc() */
qcow2_inflight_ref_switch(tgt); /* Pending qios */
qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
@@ -396,7 +418,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
} else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
flush = true;
} else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION)
{
- set_backward_merge_in_process(tgt, tgt->top, false);
+ set_backward_merge_in_process(tgt, *tgt->backward_merge.pqcow2,
false);
tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE);
}
@@ -571,7 +593,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc,
char **argv,
{
struct qcow2_target *tgt = to_qcow2_target(ti);
int ret = -EPERM;
- u32 val, val2;
+ u32 val, val2, depth = 0;
int efd;
if (!capable(CAP_SYS_ADMIN))
@@ -652,11 +674,15 @@ int qcow2_message(struct dm_target *ti, unsigned int
argc, char **argv,
} else if (!strcmp(argv[0], "merge_backward")) {
/* argc >= 2 */
if (!strcmp(argv[1], "start")) {
- if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd <
0) {
+ if (argc < 3 || argc > 4 || kstrtoint(argv[2], 10, &efd)
|| efd < 0) {
ret = -EINVAL;
goto out_unlock;
}
- ret = qcow2_merge_backward_start(tgt, efd);
+ if (argc == 4 && kstrtou32(argv[3], 10, &depth)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ ret = qcow2_merge_backward_start(tgt, efd, depth);
} else if (!strcmp(argv[1], "complete")) {
if (argc != 2) {
ret = -EINVAL;
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index 5aa00c6a5ebd5..c50b578fb8b50 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -172,6 +172,7 @@ enum qcow2_backward_merge_stage {
struct qcow2_backward_merge {
struct work_struct work;
+ struct qcow2 **pqcow2;
enum qcow2_backward_merge_state state;
int error;
struct eventfd_ctx *eventfd_ctx;
--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel