On 10/9/24 4:58 PM, Denis V. Lunev wrote: > Recent QEMU changes around preallocate_set_perm mandates that it is not > possible to poll on aio_context inside this function anymore. Thus > truncate operation has been moved inside bottom half. This bottom half > is scheduled from preallocate_set_perm() and that is all. > > This approach proven to be problematic in a lot of places once > additional operations are executed over preallocate filter in > production. The code validates that permissions have been really changed > just after the call to the set operation. > > All permissions operations or block driver graph changes are performed > inside the quiscent state in terms of the block layer. This means that > there are no in-flight packets which is guaranteed by the passing > through bdrv_drain() section. > > The idea is that we should effectively disable preallocate filter inside > bdrv_drain() and unblock permission changes. This section is definitely > not on the hot path and additional single truncate operation will not > hurt. > > Unfortunately bdrv_drain_begin() callback according to the documentation > also disallow waiting inside. Thus original approach with the bottom > half is not changed. bdrv_drain_begin() schedules the operation and in > order to ensure that it has been really executed before completion of > the section increments the amount of in-flight requests. > > Signed-off-by: Denis V. Lunev <[email protected]> > CC: Andrey Drobyshev <[email protected]> > CC: Vladimir Sementsov-Ogievskiy <[email protected]> > CC: Kevin Wolf <[email protected]> > --- > block/preallocate.c | 38 ++++++++++++++++++++++++++++++++++---- > tests/qemu-iotests/298 | 6 ++++-- > 2 files changed, 38 insertions(+), 6 deletions(-) > > diff --git a/block/preallocate.c b/block/preallocate.c > index 1cf854966c..d78ef0b045 100644 > --- a/block/preallocate.c > +++ b/block/preallocate.c > @@ -78,6 +78,7 @@ typedef struct BDRVPreallocateState { > > /* Gives up the resize permission on children when parents don't need it > */ > QEMUBH *drop_resize_bh; > + bool drop_resize_armed; > } BDRVPreallocateState; > > static int preallocate_drop_resize(BlockDriverState *bs, Error **errp); > @@ -149,6 +150,7 @@ static int preallocate_open(BlockDriverState *bs, QDict > *options, int flags, > */ > s->file_end = s->zero_start = s->data_end = -EINVAL; > s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs); > + s->drop_resize_armed = false; > > ret = bdrv_open_file_child(NULL, options, "file", bs, errp); > if (ret < 0) { > @@ -200,7 +202,7 @@ static void preallocate_close(BlockDriverState *bs) > { > BDRVPreallocateState *s = bs->opaque; > > - qemu_bh_cancel(s->drop_resize_bh); > + assert(!s->drop_resize_armed); > qemu_bh_delete(s->drop_resize_bh); > > if (s->data_end >= 0) { > @@ -504,6 +506,8 @@ static int preallocate_drop_resize(BlockDriverState *bs, > Error **errp) > BDRVPreallocateState *s = bs->opaque; > int ret; > > + s->drop_resize_armed = false; > + > if (s->data_end < 0) { > return 0; > } > @@ -534,11 +538,15 @@ static int preallocate_drop_resize(BlockDriverState > *bs, Error **errp) > > static void preallocate_drop_resize_bh(void *opaque) > { > + BlockDriverState *bs = opaque; > + > /* > * In case of errors, we'll simply keep the exclusive lock on the image > * indefinitely. > */ > - preallocate_drop_resize(opaque, NULL); > + preallocate_drop_resize(bs, NULL); > + > + bdrv_dec_in_flight(bs); > } > > static void preallocate_set_perm(BlockDriverState *bs, > @@ -547,13 +555,13 @@ static void preallocate_set_perm(BlockDriverState *bs, > BDRVPreallocateState *s = bs->opaque; > > if (can_write_resize(perm)) { > - qemu_bh_cancel(s->drop_resize_bh); > if (s->data_end < 0) { > s->data_end = s->file_end = s->zero_start = > bs->file->bs->total_sectors * BDRV_SECTOR_SIZE; > } > } else { > - qemu_bh_schedule(s->drop_resize_bh); > + assert(!s->drop_resize_armed); > + assert(s->data_end < 0); > } > } > > @@ -592,6 +600,26 @@ static int preallocate_check_perm(BlockDriverState *bs, > uint64_t perm, > return 0; > } > > +static void preallocate_drain_begin(BlockDriverState *bs) > +{ > + BDRVPreallocateState *s = bs->opaque; > + > + if (s->data_end < 0) { > + return; > + } > + if (s->drop_resize_armed) { > + return; > + } > + if (s->data_end == s->file_end) { > + s->file_end = s->zero_start = s->data_end = -EINVAL; > + return; > + } > + > + s->drop_resize_armed = true; > + bdrv_inc_in_flight(bs); > + qemu_bh_schedule(s->drop_resize_bh); > +} > + > static BlockDriver bdrv_preallocate_filter = { > .format_name = "preallocate", > .instance_size = sizeof(BDRVPreallocateState), > @@ -600,6 +628,8 @@ static BlockDriver bdrv_preallocate_filter = { > .bdrv_open = preallocate_open, > .bdrv_close = preallocate_close, > > + .bdrv_drain_begin = preallocate_drain_begin, > + > .bdrv_reopen_prepare = preallocate_reopen_prepare, > .bdrv_reopen_commit = preallocate_reopen_commit, > .bdrv_reopen_abort = preallocate_reopen_abort, > diff --git a/tests/qemu-iotests/298 b/tests/qemu-iotests/298 > index 9e75ac6975..41f12685a7 100755 > --- a/tests/qemu-iotests/298 > +++ b/tests/qemu-iotests/298 > @@ -94,8 +94,10 @@ class TestPreallocateFilter(TestPreallocateBase): > self.assert_qmp(result, 'return', {}) > self.complete_and_wait() > > - # commit of new megabyte should trigger preallocation > - self.check_big() > + # commit of new megabyte should trigger preallocation, but drain > + # will make file smaller > + self.check_small() > + > > def test_reopen_opts(self): > result = self.vm.qmp('blockdev-reopen', options=[{
This patch doesn't seem to be applying cleanly to the current master branch
