Re: [PATCH] block/rbd: implement bdrv_co_block_status

2021-08-10 Thread Peter Lieven

Am 10.08.21 um 10:51 schrieb Stefano Garzarella:

On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote:

Please, can you add a description?
For example also describing what happens if RBD image does not support 
RBD_FEATURE_FAST_DIFF.



Sure.





Signed-off-by: Peter Lieven 
---
block/rbd.c | 119 
1 file changed, 119 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index dcf82b15b8..ef1eaa6af3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
    char *namespace;
    uint64_t image_size;
    uint64_t object_size;
+    uint64_t features;
} BDRVRBDState;

typedef struct RBDTask {
@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
    s->image_size = info.size;
    s->object_size = info.obj_size;

+    r = rbd_get_features(s->image, >features);
+    if (r < 0) {
+    error_setg_errno(errp, -r, "error getting image features from %s",
+ s->image_name);
+    rbd_close(s->image);
+    goto failed_open;

  ^
You can use `failed_post_open` label here, so you can avoid to call rbd_close().



Bad me, I developed this patch in a Qemu version where failed_post_open wasn't 
present...





+    }
+
    /* If we are using an rbd snapshot, we must be r/o, otherwise
 * leave as-is */
    if (s->snap != NULL) {
@@ -1259,6 +1268,115 @@ static ImageInfoSpecific 
*qemu_rbd_get_specific_info(BlockDriverState *bs,
    return spec_info;
}

+typedef struct rbd_diff_req {
+    uint64_t offs;
+    uint64_t bytes;
+    int exists;
+} rbd_diff_req;
+
+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
+   int exists, void *opaque)
+{
+    struct rbd_diff_req *req = opaque;
+
+    assert(req->offs + req->bytes <= offs);
+    assert(offs >= req->offs + req->bytes);


I think just one of the two asserts is enough, isn't that the same condition?



Right.





+
+    if (req->exists && offs > req->offs + req->bytes) {
+    /*
+ * we started in an allocated area and jumped over an unallocated area,
+ * req->bytes contains the length of the allocated area before the
+ * unallocated area. stop further processing.
+ */
+    return -9000;

 ^
What is this magical value?

Please add a macro (with a comment) and also use it below in other places.



Will add in V2.





+    }
+    if (req->exists && !exists) {
+    /*
+ * we started in an allocated area and reached a hole. req->bytes
+ * contains the length of the allocated area before the hole.
+ * stop further processing.
+ */
+    return -9000;
+    }
+    if (!req->exists && exists && offs > req->offs) {
+    /*
+ * we started in an unallocated area and hit the first allocated
+ * block. req->bytes must be set to the length of the unallocated area
+ * before the allocated area. stop further processing.
+ */
+    req->bytes = offs - req->offs;
+    return -9000;
+    }
+
+    /*
+ * assert that we catched all cases above and allocation state has not
+ * changed during callbacks.
+ */
+    assert(exists == req->exists || !req->bytes);
+    req->exists = exists;
+
+    /*
+ * assert that we either return an unallocated block or have got callbacks
+ * for all allocated blocks present.
+ */
+    assert(!req->exists || offs == req->offs + req->bytes);
+    req->bytes = offs + len - req->offs;
+
+    return 0;
+}
+
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t 
offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+    BDRVRBDState *s = bs->opaque;
+    int ret, r;
+    struct rbd_diff_req req = { .offs = offset };
+
+    assert(offset + bytes <= s->image_size);
+
+    /* default to all sectors allocated */
+    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    if (map) {
+    *map = offset;
+    }
+    *pnum = bytes;
+
+    /* RBD image does not support fast-diff */
+    if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
+    goto out;
+    }
+
+    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+  qemu_rbd_co_block_status_cb, );
+    if (r < 0 && r != -9000) {
+    goto out;
+    }
+    assert(req.bytes <= bytes);
+    if (!req.exists) {
+    if (r == 0 && !req.bytes) {
+    /*
+ * rbd_diff_iterate2 does not invoke callbacks for unallocated 
areas
+ * except for the case where an overlay has a hole where the parent
+ * has not. This here catches the case where no callback was
+ * invoked at all.
+ */
+    req.bytes = bytes;
+  

Re: [PATCH] block/rbd: implement bdrv_co_block_status

2021-08-10 Thread Stefano Garzarella

On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote:

Please, can you add a description?
For example also describing what happens if RBD image does not support 
RBD_FEATURE_FAST_DIFF.



Signed-off-by: Peter Lieven 
---
block/rbd.c | 119 
1 file changed, 119 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index dcf82b15b8..ef1eaa6af3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
char *namespace;
uint64_t image_size;
uint64_t object_size;
+uint64_t features;
} BDRVRBDState;

typedef struct RBDTask {
@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
s->image_size = info.size;
s->object_size = info.obj_size;

+r = rbd_get_features(s->image, >features);
+if (r < 0) {
+error_setg_errno(errp, -r, "error getting image features from %s",
+ s->image_name);
+rbd_close(s->image);
+goto failed_open;

  ^
You can use `failed_post_open` label here, so you can avoid to call 
rbd_close().



+}
+
/* If we are using an rbd snapshot, we must be r/o, otherwise
 * leave as-is */
if (s->snap != NULL) {
@@ -1259,6 +1268,115 @@ static ImageInfoSpecific 
*qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}

+typedef struct rbd_diff_req {
+uint64_t offs;
+uint64_t bytes;
+int exists;
+} rbd_diff_req;
+
+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
+   int exists, void *opaque)
+{
+struct rbd_diff_req *req = opaque;
+
+assert(req->offs + req->bytes <= offs);
+assert(offs >= req->offs + req->bytes);


I think just one of the two asserts is enough, isn't that the same 
condition?



+
+if (req->exists && offs > req->offs + req->bytes) {
+/*
+ * we started in an allocated area and jumped over an unallocated area,
+ * req->bytes contains the length of the allocated area before the
+ * unallocated area. stop further processing.
+ */
+return -9000;

 ^
What is this magical value?

Please add a macro (with a comment) and also use it below in other 
places.



+}
+if (req->exists && !exists) {
+/*
+ * we started in an allocated area and reached a hole.  
req->bytes

+ * contains the length of the allocated area before the hole.
+ * stop further processing.
+ */
+return -9000;
+}
+if (!req->exists && exists && offs > req->offs) {
+/*
+ * we started in an unallocated area and hit the first allocated
+ * block. req->bytes must be set to the length of the unallocated area
+ * before the allocated area. stop further processing.
+ */
+req->bytes = offs - req->offs;
+return -9000;
+}
+
+/*
+ * assert that we catched all cases above and allocation state has not
+ * changed during callbacks.
+ */
+assert(exists == req->exists || !req->bytes);
+req->exists = exists;
+
+/*
+ * assert that we either return an unallocated block or have got callbacks
+ * for all allocated blocks present.
+ */
+assert(!req->exists || offs == req->offs + req->bytes);
+req->bytes = offs + len - req->offs;
+
+return 0;
+}
+
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t 
offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+BDRVRBDState *s = bs->opaque;
+int ret, r;
+struct rbd_diff_req req = { .offs = offset };
+
+assert(offset + bytes <= s->image_size);
+
+/* default to all sectors allocated */
+ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+if (map) {
+*map = offset;
+}
+*pnum = bytes;
+
+/* RBD image does not support fast-diff */
+if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
+goto out;
+}
+
+r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+  qemu_rbd_co_block_status_cb, );
+if (r < 0 && r != -9000) {
+goto out;
+}
+assert(req.bytes <= bytes);
+if (!req.exists) {
+if (r == 0 && !req.bytes) {
+/*
+ * rbd_diff_iterate2 does not invoke callbacks for 
unallocated areas
+ * except for the case where an overlay has a hole where 
the parent
+ * has not. This here catches the case where no callback 
was

+ * invoked at all.
+ */
+req.bytes = bytes;
+}
+ret &= ~BDRV_BLOCK_DATA;
+ret |= BDRV_BLOCK_ZERO;
+}
+*pnum = req.bytes;
+
+out:
+if (ret > 0 && 

[PATCH] block/rbd: implement bdrv_co_block_status

2021-08-09 Thread Peter Lieven
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 119 
 1 file changed, 119 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index dcf82b15b8..ef1eaa6af3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
 char *namespace;
 uint64_t image_size;
 uint64_t object_size;
+uint64_t features;
 } BDRVRBDState;
 
 typedef struct RBDTask {
@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->image_size = info.size;
 s->object_size = info.obj_size;
 
+r = rbd_get_features(s->image, >features);
+if (r < 0) {
+error_setg_errno(errp, -r, "error getting image features from %s",
+ s->image_name);
+rbd_close(s->image);
+goto failed_open;
+}
+
 /* If we are using an rbd snapshot, we must be r/o, otherwise
  * leave as-is */
 if (s->snap != NULL) {
@@ -1259,6 +1268,115 @@ static ImageInfoSpecific 
*qemu_rbd_get_specific_info(BlockDriverState *bs,
 return spec_info;
 }
 
+typedef struct rbd_diff_req {
+uint64_t offs;
+uint64_t bytes;
+int exists;
+} rbd_diff_req;
+
+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
+   int exists, void *opaque)
+{
+struct rbd_diff_req *req = opaque;
+
+assert(req->offs + req->bytes <= offs);
+assert(offs >= req->offs + req->bytes);
+
+if (req->exists && offs > req->offs + req->bytes) {
+/*
+ * we started in an allocated area and jumped over an unallocated area,
+ * req->bytes contains the length of the allocated area before the
+ * unallocated area. stop further processing.
+ */
+return -9000;
+}
+if (req->exists && !exists) {
+/*
+ * we started in an allocated area and reached a hole. req->bytes
+ * contains the length of the allocated area before the hole.
+ * stop further processing.
+ */
+return -9000;
+}
+if (!req->exists && exists && offs > req->offs) {
+/*
+ * we started in an unallocated area and hit the first allocated
+ * block. req->bytes must be set to the length of the unallocated area
+ * before the allocated area. stop further processing.
+ */
+req->bytes = offs - req->offs;
+return -9000;
+}
+
+/*
+ * assert that we catched all cases above and allocation state has not
+ * changed during callbacks.
+ */
+assert(exists == req->exists || !req->bytes);
+req->exists = exists;
+
+/*
+ * assert that we either return an unallocated block or have got callbacks
+ * for all allocated blocks present.
+ */
+assert(!req->exists || offs == req->offs + req->bytes);
+req->bytes = offs + len - req->offs;
+
+return 0;
+}
+
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t 
offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+BDRVRBDState *s = bs->opaque;
+int ret, r;
+struct rbd_diff_req req = { .offs = offset };
+
+assert(offset + bytes <= s->image_size);
+
+/* default to all sectors allocated */
+ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+if (map) {
+*map = offset;
+}
+*pnum = bytes;
+
+/* RBD image does not support fast-diff */
+if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
+goto out;
+}
+
+r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+  qemu_rbd_co_block_status_cb, );
+if (r < 0 && r != -9000) {
+goto out;
+}
+assert(req.bytes <= bytes);
+if (!req.exists) {
+if (r == 0 && !req.bytes) {
+/*
+ * rbd_diff_iterate2 does not invoke callbacks for unallocated 
areas
+ * except for the case where an overlay has a hole where the parent
+ * has not. This here catches the case where no callback was
+ * invoked at all.
+ */
+req.bytes = bytes;
+}
+ret &= ~BDRV_BLOCK_DATA;
+ret |= BDRV_BLOCK_ZERO;
+}
+*pnum = req.bytes;
+
+out:
+if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
+*file = bs;
+}
+return ret;
+}
+
 static int64_t qemu_rbd_getlength(BlockDriverState *bs)
 {
 BDRVRBDState *s = bs->opaque;
@@ -1494,6 +1612,7 @@ static BlockDriver bdrv_rbd = {
 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
 .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
 #endif
+.bdrv_co_block_status   = qemu_rbd_co_block_status,
 
 .bdrv_snapshot_create   = qemu_rbd_snap_create,
 .bdrv_snapshot_delete   =