snapshot: move drain outside of read-locked bdrv_snapshot_delete()

Kevin Wolf Mon, 26 May 2025 02:11:20 -0700

Am 23.05.2025 um 20:12 hat Andrey Drobyshev geschrieben:
> On 5/20/25 1:29 PM, Fiona Ebner wrote:
> > This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED.
> > 
> > More granular draining is not trivially possible, because
> > bdrv_snapshot_delete() can recursively call itself.
> > 
> > The return value of bdrv_all_delete_snapshot() changes from -1 to
> > -errno propagated from failed sub-calls. This is fine for the existing
> > callers of bdrv_all_delete_snapshot().
> > 
> > Signed-off-by: Fiona Ebner <f.eb...@proxmox.com>
> > ---
> > 
> > Changes in v2:
> > * Use 'must be drained' instead of 'needs to be drained'.
> > * Use goto for cleanup/error handling in bdrv_all_delete_snapshot().
> > * Don't use atomics to access bs->quiesce_counter.
> > 
> >  block/snapshot.c | 26 +++++++++++++++-----------
> >  blockdev.c       | 25 +++++++++++++++++--------
> >  qemu-img.c       |  2 ++
> >  3 files changed, 34 insertions(+), 19 deletions(-)
> > 
> > diff --git a/block/snapshot.c b/block/snapshot.c
> > index 22567f1fb9..9f300a78bd 100644
> > --- a/block/snapshot.c
> > +++ b/block/snapshot.c
> > @@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
> >  
> >  /**
> >   * Delete an internal snapshot by @snapshot_id and @name.
> > - * @bs: block device used in the operation
> > + * @bs: block device used in the operation, must be drained
> >   * @snapshot_id: unique snapshot ID, or NULL
> >   * @name: snapshot name, or NULL
> >   * @errp: location to store error
> > @@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> >  
> >      GLOBAL_STATE_CODE();
> >  
> > +    assert(bs->quiesce_counter > 0);
> > +
> >      if (!drv) {
> >          error_setg(errp, "Device '%s' has no medium",
> >                     bdrv_get_device_name(bs));
> > @@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> >          return -EINVAL;
> >      }
> >  
> > -    /* drain all pending i/o before deleting snapshot */
> > -    bdrv_drained_begin(bs);
> > -
> >      if (drv->bdrv_snapshot_delete) {
> >          ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
> >      } else if (fallback_bs) {
> > @@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
> >          ret = -ENOTSUP;
> >      }
> >  
> > -    bdrv_drained_end(bs);
> >      return ret;
> >  }
> >  
> > @@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name,
> >      ERRP_GUARD();
> >      g_autoptr(GList) bdrvs = NULL;
> >      GList *iterbdrvs;
> > +    int ret = 0;
> >  
> >      GLOBAL_STATE_CODE();
> > -    GRAPH_RDLOCK_GUARD_MAINLOOP();
> >  
> > -    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) 
> > < 0) {
> > -        return -1;
> > +    bdrv_drain_all_begin();
> > +    bdrv_graph_rdlock_main_loop();
> > +
> > +    ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, 
> > errp);
> > +    if (ret < 0) {
> > +        goto out;
> >      }
> >  
> >      iterbdrvs = bdrvs;
> >      while (iterbdrvs) {
> >          BlockDriverState *bs = iterbdrvs->data;
> >          QEMUSnapshotInfo sn1, *snapshot = &sn1;
> > -        int ret = 0;
> >  
> >          if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
> >              bdrv_snapshot_find(bs, snapshot, name) >= 0)
> > @@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name,
> >          if (ret < 0) {
> >              error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
> >                            name, bdrv_get_device_or_node_name(bs));
> > -            return -1;
> > +            goto out;
> >          }
> >  
> >          iterbdrvs = iterbdrvs->next;
> >      }
> >  
> > -    return 0;
> > +out:
> > +    bdrv_graph_rdunlock_main_loop();
> > +    bdrv_drain_all_end();
> > +    return ret;
> >  }
> >  
> >  
> > diff --git a/blockdev.c b/blockdev.c
> > index 21443b4514..3982f9776b 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -1132,39 +1132,41 @@ SnapshotInfo 
> > *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> >      int ret;
> >  
> >      GLOBAL_STATE_CODE();
> > -    GRAPH_RDLOCK_GUARD_MAINLOOP();
> > +
> > +    bdrv_drain_all_begin();
> > +    bdrv_graph_rdlock_main_loop();
> >  
> >      bs = qmp_get_root_bs(device, errp);
> >      if (!bs) {
> > -        return NULL;
> > +        goto error;
> >      }
> >  
> >      if (!id && !name) {
> >          error_setg(errp, "Name or id must be provided");
> > -        return NULL;
> > +        goto error;
> >      }
> >  
> >      if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, 
> > errp)) {
> > -        return NULL;
> > +        goto error;
> >      }
> >  
> >      ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
> >      if (local_err) {
> >          error_propagate(errp, local_err);
> > -        return NULL;
> > +        goto error;
> >      }
> >      if (!ret) {
> >          error_setg(errp,
> >                     "Snapshot with id '%s' and name '%s' does not exist on "
> >                     "device '%s'",
> >                     STR_OR_NULL(id), STR_OR_NULL(name), device);
> > -        return NULL;
> > +        goto error;
> >      }
> >  
> >      bdrv_snapshot_delete(bs, id, name, &local_err);
> >      if (local_err) {
> >          error_propagate(errp, local_err);
> > -        return NULL;
> > +        goto error;
> >      }
> >  
> >      info = g_new0(SnapshotInfo, 1);
> > @@ -1180,6 +1182,9 @@ SnapshotInfo 
> > *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
> >          info->has_icount = true;
> >      }
> >  
> > +error:
> > +    bdrv_graph_rdunlock_main_loop();
> > +    bdrv_drain_all_end();
> >      return info;
> >  }
> >  
> > @@ -1295,12 +1300,14 @@ static void internal_snapshot_abort(void *opaque)
> >      Error *local_error = NULL;
> >  
> >      GLOBAL_STATE_CODE();
> > -    GRAPH_RDLOCK_GUARD_MAINLOOP();
> >  
> >      if (!state->created) {
> >          return;
> >      }
> >  
> > +    bdrv_drain_all_begin();
> > +    bdrv_graph_rdlock_main_loop();
> > +
> >      if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
> >          error_reportf_err(local_error,
> >                            "Failed to delete snapshot with id '%s' and "
> > @@ -1308,6 +1315,8 @@ static void internal_snapshot_abort(void *opaque)
> >                            sn->id_str, sn->name,
> >                            bdrv_get_device_name(bs));
> >      }
> > +    bdrv_graph_rdunlock_main_loop();
> > +    bdrv_drain_all_end();
> >  }
> >
> 
> Okay, I've got a very simple and naive question to ask.  We've got this
> pattern recurring throughout the series:
> 
> > GLOBAL_STATE_CODE();
> > bdrv_drain_all_begin();
> > bdrv_graph_rdlock_main_loop();
> > 
> > ...
> > 
> > bdrv_graph_rdunlock_main_loop();
> > bdrv_drain_all_end();
> 
> bdrv_graph_rdlock_main_loop() doesn't actually take any locks, it
> asserts that we're running in the main thread and not in a coroutine.
> bdrv_graph_rdunlock_main_loop() does the same.
> GRAPH_RDLOCK_GUARD_MAINLOOP() does both those calls, in the beginning of
> a function and when leaving its scope, so essentially it also just does
> assert(qemu_in_main_thread() && !qemu_in_coroutine()).
> 
> Therefore:
> 
> 1. Is there any real benefit from using those
> {rdlock/rdunlock}_main_loop() constructions, or they're here due to
> historical reasons only?


It's the price we pay for the compiler to verify our locking rules.

> 2. Would it hurt if we only leave GRAPH_RDLOCK_GUARD_MAINLOOP() in all
> such occurrences?  At least when it's obvious we can't get out of the
> main thread.  That would simply deliver us from performing same checks
> several times, similar to what's done in commit 22/24 ("block/io: remove
> duplicate GLOBAL_STATE_CODE() in bdrv_do_drained_end()").

Once bdrv_drain_all_begin() is marked GRAPH_UNLOCKED, calling it after
GRAPH_RDLOCK_GUARD_MAINLOOP() would be wrong according to TSA rules
(which don't know anything about this being only a fake lock) and the
build would fail.

Kevin

Re: [PATCH v2 03/24] block/snapshot: move drain outside of read-locked bdrv_snapshot_delete()

Reply via email to