Re: [PATCH 1/2] block/rbd: fix handling of holes in .bdrv_co_block_status

2022-01-12 Thread Peter Lieven


> Am 12.01.2022 um 22:06 schrieb Ilya Dryomov :
> 
> On Wed, Jan 12, 2022 at 9:39 PM Peter Lieven  wrote:
>> 
>>> Am 12.01.22 um 10:05 schrieb Ilya Dryomov:
>>> On Mon, Jan 10, 2022 at 12:42 PM Peter Lieven  wrote:
 the assumption that we can't hit a hole if we do not diff against a 
 snapshot was wrong.
 
 We can see a hole in an image if we diff against base if there exists an 
 older snapshot
 of the image and we have discarded blocks in the image where the snapshot 
 has data.
 
 Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
 Cc: qemu-sta...@nongnu.org
 Signed-off-by: Peter Lieven 
 ---
 block/rbd.c | 55 +
 1 file changed, 34 insertions(+), 21 deletions(-)
 
 diff --git a/block/rbd.c b/block/rbd.c
 index def96292e0..5e9dc91d81 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
 @@ -1279,13 +1279,24 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
 size_t len,
 RBDDiffIterateReq *req = opaque;
 
 assert(req->offs + req->bytes <= offs);
 -/*
 - * we do not diff against a snapshot so we should never receive a 
 callback
 - * for a hole.
 - */
 -assert(exists);
 
 -if (!req->exists && offs > req->offs) {
 +if (req->exists && offs > req->offs + req->bytes) {
 +/*
 + * we started in an allocated area and jumped over an unallocated 
 area,
 + * req->bytes contains the length of the allocated area before the
 + * unallocated area. stop further processing.
 + */
 +return QEMU_RBD_EXIT_DIFF_ITERATE2;
 +}
 +if (req->exists && !exists) {
 +/*
 + * we started in an allocated area and reached a hole. req->bytes
 + * contains the length of the allocated area before the hole.
 + * stop further processing.
 + */
 +return QEMU_RBD_EXIT_DIFF_ITERATE2;
 +}
 +if (!req->exists && exists && offs > req->offs) {
 /*
  * we started in an unallocated area and hit the first allocated
  * block. req->bytes must be set to the length of the unallocated 
 area
 @@ -1295,17 +1306,19 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
 size_t len,
 return QEMU_RBD_EXIT_DIFF_ITERATE2;
 }
 
 -if (req->exists && offs > req->offs + req->bytes) {
 -/*
 - * we started in an allocated area and jumped over an unallocated 
 area,
 - * req->bytes contains the length of the allocated area before the
 - * unallocated area. stop further processing.
 - */
 -return QEMU_RBD_EXIT_DIFF_ITERATE2;
 -}
 +/*
 + * assert that we caught all cases above and allocation state has not
 + * changed during callbacks.
 + */
 +assert(exists == req->exists || !req->bytes);
 +req->exists = exists;
 
 -req->bytes += len;
 -req->exists = true;
 +/*
 + * assert that we either return an unallocated block or have got 
 callbacks
 + * for all allocated blocks present.
 + */
 +assert(!req->exists || offs == req->offs + req->bytes);
 +req->bytes = offs + len - req->offs;
 
 return 0;
 }
 @@ -1354,13 +1367,13 @@ static int coroutine_fn 
 qemu_rbd_co_block_status(BlockDriverState *bs,
 }
 assert(req.bytes <= bytes);
 if (!req.exists) {
 -if (r == 0) {
 +if (r == 0 && !req.bytes) {
 /*
 - * rbd_diff_iterate2 does not invoke callbacks for unallocated
 - * areas. This here catches the case where no callback was
 - * invoked at all (req.bytes == 0).
 + * rbd_diff_iterate2 does not invoke callbacks for 
 unallocated areas
 + * except for the case where an overlay has a hole where the 
 parent
 + * or an older snapshot of the image has not. This here 
 catches the
 + * case where no callback was invoked at all.
  */
 -assert(req.bytes == 0);
 req.bytes = bytes;
 }
 status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
 --
 2.25.1
 
 
>>> Hi Peter,
>>> 
>>> Can we just skip these "holes" by replacing the existing assert with
>>> an if statement that would simply bail from the callback on !exists?
>>> 
>>> Just trying to keep the logic as simple as possible since as it turns
>>> out we get to contend with ages-old librbd bugs here...
>> 
>> 
>> I'm afraid I think this would not work. Consider qemu-img convert.
>> 
>> If we bail out we would immediately call get_block_status with the offset
>> 
>> where we stopped 

Re: [PATCH 1/2] block/rbd: fix handling of holes in .bdrv_co_block_status

2022-01-12 Thread Ilya Dryomov
On Wed, Jan 12, 2022 at 9:39 PM Peter Lieven  wrote:
>
> Am 12.01.22 um 10:05 schrieb Ilya Dryomov:
> > On Mon, Jan 10, 2022 at 12:42 PM Peter Lieven  wrote:
> >> the assumption that we can't hit a hole if we do not diff against a 
> >> snapshot was wrong.
> >>
> >> We can see a hole in an image if we diff against base if there exists an 
> >> older snapshot
> >> of the image and we have discarded blocks in the image where the snapshot 
> >> has data.
> >>
> >> Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
> >> Cc: qemu-sta...@nongnu.org
> >> Signed-off-by: Peter Lieven 
> >> ---
> >>  block/rbd.c | 55 +
> >>  1 file changed, 34 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index def96292e0..5e9dc91d81 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -1279,13 +1279,24 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
> >> size_t len,
> >>  RBDDiffIterateReq *req = opaque;
> >>
> >>  assert(req->offs + req->bytes <= offs);
> >> -/*
> >> - * we do not diff against a snapshot so we should never receive a 
> >> callback
> >> - * for a hole.
> >> - */
> >> -assert(exists);
> >>
> >> -if (!req->exists && offs > req->offs) {
> >> +if (req->exists && offs > req->offs + req->bytes) {
> >> +/*
> >> + * we started in an allocated area and jumped over an unallocated 
> >> area,
> >> + * req->bytes contains the length of the allocated area before the
> >> + * unallocated area. stop further processing.
> >> + */
> >> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> +}
> >> +if (req->exists && !exists) {
> >> +/*
> >> + * we started in an allocated area and reached a hole. req->bytes
> >> + * contains the length of the allocated area before the hole.
> >> + * stop further processing.
> >> + */
> >> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> +}
> >> +if (!req->exists && exists && offs > req->offs) {
> >>  /*
> >>   * we started in an unallocated area and hit the first allocated
> >>   * block. req->bytes must be set to the length of the unallocated 
> >> area
> >> @@ -1295,17 +1306,19 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
> >> size_t len,
> >>  return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >>  }
> >>
> >> -if (req->exists && offs > req->offs + req->bytes) {
> >> -/*
> >> - * we started in an allocated area and jumped over an unallocated 
> >> area,
> >> - * req->bytes contains the length of the allocated area before the
> >> - * unallocated area. stop further processing.
> >> - */
> >> -return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> -}
> >> +/*
> >> + * assert that we caught all cases above and allocation state has not
> >> + * changed during callbacks.
> >> + */
> >> +assert(exists == req->exists || !req->bytes);
> >> +req->exists = exists;
> >>
> >> -req->bytes += len;
> >> -req->exists = true;
> >> +/*
> >> + * assert that we either return an unallocated block or have got 
> >> callbacks
> >> + * for all allocated blocks present.
> >> + */
> >> +assert(!req->exists || offs == req->offs + req->bytes);
> >> +req->bytes = offs + len - req->offs;
> >>
> >>  return 0;
> >>  }
> >> @@ -1354,13 +1367,13 @@ static int coroutine_fn 
> >> qemu_rbd_co_block_status(BlockDriverState *bs,
> >>  }
> >>  assert(req.bytes <= bytes);
> >>  if (!req.exists) {
> >> -if (r == 0) {
> >> +if (r == 0 && !req.bytes) {
> >>  /*
> >> - * rbd_diff_iterate2 does not invoke callbacks for unallocated
> >> - * areas. This here catches the case where no callback was
> >> - * invoked at all (req.bytes == 0).
> >> + * rbd_diff_iterate2 does not invoke callbacks for 
> >> unallocated areas
> >> + * except for the case where an overlay has a hole where the 
> >> parent
> >> + * or an older snapshot of the image has not. This here 
> >> catches the
> >> + * case where no callback was invoked at all.
> >>   */
> >> -assert(req.bytes == 0);
> >>  req.bytes = bytes;
> >>  }
> >>  status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
> >> --
> >> 2.25.1
> >>
> >>
> > Hi Peter,
> >
> > Can we just skip these "holes" by replacing the existing assert with
> > an if statement that would simply bail from the callback on !exists?
> >
> > Just trying to keep the logic as simple as possible since as it turns
> > out we get to contend with ages-old librbd bugs here...
>
>
> I'm afraid I think this would not work. Consider qemu-img convert.
>
> If we bail out we would immediately call get_block_status with the offset
>
> where we stopped and hit the !exist again.

I'm suggesting bailing from the 

Re: [PATCH 1/2] block/rbd: fix handling of holes in .bdrv_co_block_status

2022-01-12 Thread Peter Lieven
Am 12.01.22 um 10:05 schrieb Ilya Dryomov:
> On Mon, Jan 10, 2022 at 12:42 PM Peter Lieven  wrote:
>> the assumption that we can't hit a hole if we do not diff against a snapshot 
>> was wrong.
>>
>> We can see a hole in an image if we diff against base if there exists an 
>> older snapshot
>> of the image and we have discarded blocks in the image where the snapshot 
>> has data.
>>
>> Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
>> Cc: qemu-sta...@nongnu.org
>> Signed-off-by: Peter Lieven 
>> ---
>>  block/rbd.c | 55 +
>>  1 file changed, 34 insertions(+), 21 deletions(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index def96292e0..5e9dc91d81 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -1279,13 +1279,24 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
>> size_t len,
>>  RBDDiffIterateReq *req = opaque;
>>
>>  assert(req->offs + req->bytes <= offs);
>> -/*
>> - * we do not diff against a snapshot so we should never receive a 
>> callback
>> - * for a hole.
>> - */
>> -assert(exists);
>>
>> -if (!req->exists && offs > req->offs) {
>> +if (req->exists && offs > req->offs + req->bytes) {
>> +/*
>> + * we started in an allocated area and jumped over an unallocated 
>> area,
>> + * req->bytes contains the length of the allocated area before the
>> + * unallocated area. stop further processing.
>> + */
>> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
>> +}
>> +if (req->exists && !exists) {
>> +/*
>> + * we started in an allocated area and reached a hole. req->bytes
>> + * contains the length of the allocated area before the hole.
>> + * stop further processing.
>> + */
>> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
>> +}
>> +if (!req->exists && exists && offs > req->offs) {
>>  /*
>>   * we started in an unallocated area and hit the first allocated
>>   * block. req->bytes must be set to the length of the unallocated 
>> area
>> @@ -1295,17 +1306,19 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
>> size_t len,
>>  return QEMU_RBD_EXIT_DIFF_ITERATE2;
>>  }
>>
>> -if (req->exists && offs > req->offs + req->bytes) {
>> -/*
>> - * we started in an allocated area and jumped over an unallocated 
>> area,
>> - * req->bytes contains the length of the allocated area before the
>> - * unallocated area. stop further processing.
>> - */
>> -return QEMU_RBD_EXIT_DIFF_ITERATE2;
>> -}
>> +/*
>> + * assert that we caught all cases above and allocation state has not
>> + * changed during callbacks.
>> + */
>> +assert(exists == req->exists || !req->bytes);
>> +req->exists = exists;
>>
>> -req->bytes += len;
>> -req->exists = true;
>> +/*
>> + * assert that we either return an unallocated block or have got 
>> callbacks
>> + * for all allocated blocks present.
>> + */
>> +assert(!req->exists || offs == req->offs + req->bytes);
>> +req->bytes = offs + len - req->offs;
>>
>>  return 0;
>>  }
>> @@ -1354,13 +1367,13 @@ static int coroutine_fn 
>> qemu_rbd_co_block_status(BlockDriverState *bs,
>>  }
>>  assert(req.bytes <= bytes);
>>  if (!req.exists) {
>> -if (r == 0) {
>> +if (r == 0 && !req.bytes) {
>>  /*
>> - * rbd_diff_iterate2 does not invoke callbacks for unallocated
>> - * areas. This here catches the case where no callback was
>> - * invoked at all (req.bytes == 0).
>> + * rbd_diff_iterate2 does not invoke callbacks for unallocated 
>> areas
>> + * except for the case where an overlay has a hole where the 
>> parent
>> + * or an older snapshot of the image has not. This here catches 
>> the
>> + * case where no callback was invoked at all.
>>   */
>> -assert(req.bytes == 0);
>>  req.bytes = bytes;
>>  }
>>  status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
>> --
>> 2.25.1
>>
>>
> Hi Peter,
>
> Can we just skip these "holes" by replacing the existing assert with
> an if statement that would simply bail from the callback on !exists?
>
> Just trying to keep the logic as simple as possible since as it turns
> out we get to contend with ages-old librbd bugs here...


I'm afraid I think this would not work. Consider qemu-img convert.

If we bail out we would immediately call get_block_status with the offset

where we stopped and hit the !exist again.


Peter




Re: [PATCH 1/2] block/rbd: fix handling of holes in .bdrv_co_block_status

2022-01-12 Thread Ilya Dryomov
On Mon, Jan 10, 2022 at 12:42 PM Peter Lieven  wrote:
>
> the assumption that we can't hit a hole if we do not diff against a snapshot 
> was wrong.
>
> We can see a hole in an image if we diff against base if there exists an 
> older snapshot
> of the image and we have discarded blocks in the image where the snapshot has 
> data.
>
> Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Peter Lieven 
> ---
>  block/rbd.c | 55 +
>  1 file changed, 34 insertions(+), 21 deletions(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index def96292e0..5e9dc91d81 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -1279,13 +1279,24 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
> size_t len,
>  RBDDiffIterateReq *req = opaque;
>
>  assert(req->offs + req->bytes <= offs);
> -/*
> - * we do not diff against a snapshot so we should never receive a 
> callback
> - * for a hole.
> - */
> -assert(exists);
>
> -if (!req->exists && offs > req->offs) {
> +if (req->exists && offs > req->offs + req->bytes) {
> +/*
> + * we started in an allocated area and jumped over an unallocated 
> area,
> + * req->bytes contains the length of the allocated area before the
> + * unallocated area. stop further processing.
> + */
> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> +}
> +if (req->exists && !exists) {
> +/*
> + * we started in an allocated area and reached a hole. req->bytes
> + * contains the length of the allocated area before the hole.
> + * stop further processing.
> + */
> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> +}
> +if (!req->exists && exists && offs > req->offs) {
>  /*
>   * we started in an unallocated area and hit the first allocated
>   * block. req->bytes must be set to the length of the unallocated 
> area
> @@ -1295,17 +1306,19 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
> size_t len,
>  return QEMU_RBD_EXIT_DIFF_ITERATE2;
>  }
>
> -if (req->exists && offs > req->offs + req->bytes) {
> -/*
> - * we started in an allocated area and jumped over an unallocated 
> area,
> - * req->bytes contains the length of the allocated area before the
> - * unallocated area. stop further processing.
> - */
> -return QEMU_RBD_EXIT_DIFF_ITERATE2;
> -}
> +/*
> + * assert that we caught all cases above and allocation state has not
> + * changed during callbacks.
> + */
> +assert(exists == req->exists || !req->bytes);
> +req->exists = exists;
>
> -req->bytes += len;
> -req->exists = true;
> +/*
> + * assert that we either return an unallocated block or have got 
> callbacks
> + * for all allocated blocks present.
> + */
> +assert(!req->exists || offs == req->offs + req->bytes);
> +req->bytes = offs + len - req->offs;
>
>  return 0;
>  }
> @@ -1354,13 +1367,13 @@ static int coroutine_fn 
> qemu_rbd_co_block_status(BlockDriverState *bs,
>  }
>  assert(req.bytes <= bytes);
>  if (!req.exists) {
> -if (r == 0) {
> +if (r == 0 && !req.bytes) {
>  /*
> - * rbd_diff_iterate2 does not invoke callbacks for unallocated
> - * areas. This here catches the case where no callback was
> - * invoked at all (req.bytes == 0).
> + * rbd_diff_iterate2 does not invoke callbacks for unallocated 
> areas
> + * except for the case where an overlay has a hole where the 
> parent
> + * or an older snapshot of the image has not. This here catches 
> the
> + * case where no callback was invoked at all.
>   */
> -assert(req.bytes == 0);
>  req.bytes = bytes;
>  }
>  status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
> --
> 2.25.1
>
>

Hi Peter,

Can we just skip these "holes" by replacing the existing assert with
an if statement that would simply bail from the callback on !exists?

Just trying to keep the logic as simple as possible since as it turns
out we get to contend with ages-old librbd bugs here...

Thanks,

Ilya



[PATCH 1/2] block/rbd: fix handling of holes in .bdrv_co_block_status

2022-01-10 Thread Peter Lieven
the assumption that we can't hit a hole if we do not diff against a snapshot 
was wrong.

We can see a hole in an image if we diff against base if there exists an older 
snapshot
of the image and we have discarded blocks in the image where the snapshot has 
data.

Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
Cc: qemu-sta...@nongnu.org
Signed-off-by: Peter Lieven 
---
 block/rbd.c | 55 +
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index def96292e0..5e9dc91d81 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1279,13 +1279,24 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
size_t len,
 RBDDiffIterateReq *req = opaque;
 
 assert(req->offs + req->bytes <= offs);
-/*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
-assert(exists);
 
-if (!req->exists && offs > req->offs) {
+if (req->exists && offs > req->offs + req->bytes) {
+/*
+ * we started in an allocated area and jumped over an unallocated area,
+ * req->bytes contains the length of the allocated area before the
+ * unallocated area. stop further processing.
+ */
+return QEMU_RBD_EXIT_DIFF_ITERATE2;
+}
+if (req->exists && !exists) {
+/*
+ * we started in an allocated area and reached a hole. req->bytes
+ * contains the length of the allocated area before the hole.
+ * stop further processing.
+ */
+return QEMU_RBD_EXIT_DIFF_ITERATE2;
+}
+if (!req->exists && exists && offs > req->offs) {
 /*
  * we started in an unallocated area and hit the first allocated
  * block. req->bytes must be set to the length of the unallocated area
@@ -1295,17 +1306,19 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, 
size_t len,
 return QEMU_RBD_EXIT_DIFF_ITERATE2;
 }
 
-if (req->exists && offs > req->offs + req->bytes) {
-/*
- * we started in an allocated area and jumped over an unallocated area,
- * req->bytes contains the length of the allocated area before the
- * unallocated area. stop further processing.
- */
-return QEMU_RBD_EXIT_DIFF_ITERATE2;
-}
+/*
+ * assert that we caught all cases above and allocation state has not
+ * changed during callbacks.
+ */
+assert(exists == req->exists || !req->bytes);
+req->exists = exists;
 
-req->bytes += len;
-req->exists = true;
+/*
+ * assert that we either return an unallocated block or have got callbacks
+ * for all allocated blocks present.
+ */
+assert(!req->exists || offs == req->offs + req->bytes);
+req->bytes = offs + len - req->offs;
 
 return 0;
 }
@@ -1354,13 +1367,13 @@ static int coroutine_fn 
qemu_rbd_co_block_status(BlockDriverState *bs,
 }
 assert(req.bytes <= bytes);
 if (!req.exists) {
-if (r == 0) {
+if (r == 0 && !req.bytes) {
 /*
- * rbd_diff_iterate2 does not invoke callbacks for unallocated
- * areas. This here catches the case where no callback was
- * invoked at all (req.bytes == 0).
+ * rbd_diff_iterate2 does not invoke callbacks for unallocated 
areas
+ * except for the case where an overlay has a hole where the parent
+ * or an older snapshot of the image has not. This here catches the
+ * case where no callback was invoked at all.
  */
-assert(req.bytes == 0);
 req.bytes = bytes;
 }
 status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
-- 
2.25.1