Peter Xu <[email protected]> writes:

> migrate_set_error() currently doesn't take ownership of the error being
> passed in.  It's not aligned with the error API and meanwhile it also
> makes most of the caller free the error explicitly.
>
> Change the API to take the ownership of the Error object instead.  When at
> it, remove the first parameter so it's friendly to g_clear_pointer().  It
> can be used whenever the caller wants to provide extra safety measure (or
> reuse the pointer) to reset the Error* pointer after stolen.
>
> Signed-off-by: Peter Xu <[email protected]>

Worth mentioning that this avoids Error object copies?

> ---
>  migration/migration.h            |  2 +-
>  migration/cpr-exec.c             |  4 +--
>  migration/migration.c            | 46 +++++++++++++++-----------------
>  migration/multifd-device-state.c |  5 +---
>  migration/multifd.c              | 19 +++++++------
>  migration/postcopy-ram.c         |  5 ++--
>  migration/ram.c                  |  4 +--
>  migration/savevm.c               | 15 ++++-------
>  8 files changed, 42 insertions(+), 58 deletions(-)
>
> diff --git a/migration/migration.h b/migration/migration.h
> index 213b33fe6e..df74f9b14f 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -525,7 +525,7 @@ void migration_incoming_process(void);
>  
>  bool  migration_has_all_channels(void);
>  
> -void migrate_set_error(MigrationState *s, const Error *error);
> +void migrate_error_propagate(Error *error);
>  bool migrate_has_error(MigrationState *s);
>  
>  void migration_connect(MigrationState *s, Error *error_in);
> diff --git a/migration/cpr-exec.c b/migration/cpr-exec.c
> index 0b8344a86f..13e6138f56 100644
> --- a/migration/cpr-exec.c
> +++ b/migration/cpr-exec.c
> @@ -158,9 +158,7 @@ static void cpr_exec_cb(void *opaque)
>  
>      error_report_err(error_copy(err));
>      migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> -    migrate_set_error(s, err);
> -    error_free(err);
> -    err = NULL;
> +    g_clear_pointer(&err, migrate_error_propagate);
>  
>      /* Note, we can go from state COMPLETED to FAILED */
>      migration_call_notifiers(s, MIG_EVENT_PRECOPY_FAILED, NULL);

I dislike g_clear_pointer(), and I dislike the change from taking the
migration state as argument to getting the global migration state with
migrate_get_current().  The loss of similarity to error_propagate() is
unfortunate, but tolerable.  This is not a demand.

For each hunk, we need to prove that the migrate_set_error()'s first
argument before the patch equals migrate_get_current() afterwards.

For this hunk, it's locally obvious: @s is initialized to
migrate_get_current() at the beginning of the funtion.

Where it's not locally obvious, I guess we could argue that just one
MigrationState object exists, so a MigrationState * can only point to
that one.

If locally non-obvious hunks exist, such an argument needs to be made in
the commit message.

I did *not* check this aspect of the patch.

> diff --git a/migration/migration.c b/migration/migration.c
> index 4fe69cc2ef..219d3129cb 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -914,9 +914,7 @@ process_incoming_migration_co(void *opaque)
>  fail:
>      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_FAILED);
> -    migrate_set_error(s, local_err);
> -    error_free(local_err);
> -
> +    migrate_error_propagate(local_err);
>      migration_incoming_state_destroy();
>  
>      if (mis->exit_on_error) {
> @@ -1548,14 +1546,22 @@ static void migration_cleanup_bh(void *opaque)
>      migration_cleanup(opaque);
>  }
>  
> -void migrate_set_error(MigrationState *s, const Error *error)
> +/*
> + * Propagate the Error* object to migration core.  The caller mustn't
> + * reference the error pointer after the function returned, because the
> + * Error* object might be freed.
> + */
> +void migrate_error_propagate(Error *error)
>  {
> -    QEMU_LOCK_GUARD(&s->error_mutex);
> +    MigrationState *s = migrate_get_current();
>  
> +    QEMU_LOCK_GUARD(&s->error_mutex);
>      trace_migrate_error(error_get_pretty(error));
>  
>      if (!s->error) {
> -        s->error = error_copy(error);
> +        s->error = error;
> +    } else {
> +        error_free(error);
>      }
>  }
>  
> @@ -1601,8 +1607,7 @@ static void migration_connect_set_error(MigrationState 
> *s, Error *error)
>      }
>  
>      migrate_set_state(&s->state, current, next);
> -    migrate_set_error(s, error);
> -    error_free(error);
> +    migrate_error_propagate(error);
>  }
>  
>  void migration_cancel(void)
> @@ -2014,8 +2019,7 @@ void qmp_migrate_pause(Error **errp)
>  
>          /* Tell the core migration that we're pausing */
>          error_setg(&error, "Postcopy migration is paused by the user");
> -        migrate_set_error(ms, error);
> -        error_free(error);
> +        migrate_error_propagate(error);
>  
>          qemu_mutex_lock(&ms->qemu_file_lock);
>          if (ms->to_dst_file) {
> @@ -2647,8 +2651,7 @@ static void *source_return_path_thread(void *opaque)
>  
>  out:
>      if (err) {
> -        migrate_set_error(ms, err);
> -        error_free(err);
> +        migrate_error_propagate(err);
>          trace_source_return_path_thread_bad_end();
>      }
>  
> @@ -3094,12 +3097,10 @@ static void migration_completion(MigrationState *s)
>  
>  fail:
>      if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) {
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>      } else if (ret) {
>          error_setg_errno(&local_err, -ret, "Error in migration completion");
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>      }
>  
>      if (s->state != MIGRATION_STATUS_CANCELLING) {
> @@ -3326,8 +3327,7 @@ static MigThrError 
> migration_detect_error(MigrationState *s)
>      }
>  
>      if (local_error) {
> -        migrate_set_error(s, local_error);
> -        error_free(local_error);
> +        migrate_error_propagate(local_error);
>      }
>  
>      if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
> @@ -3522,7 +3522,7 @@ static MigIterateState 
> migration_iteration_run(MigrationState *s)
>          if (must_precopy <= s->threshold_size &&
>              can_switchover && qatomic_read(&s->start_postcopy)) {
>              if (postcopy_start(s, &local_err)) {
> -                migrate_set_error(s, local_err);
> +                migrate_error_propagate(error_copy(local_err));

First hunk where we don't save a copy.  Reason: we report in addition to
propagate.  There are a several more below.

"Forking" the error like this gives me an uneasy feeling, as explained
in
    Subject: Re: [PATCH 0/3] migration: Error fixes and improvements
    Date: Tue, 18 Nov 2025 08:44:32 +0100
    Message-ID: <[email protected]>

Clearly out of scope for this series.

>                  error_report_err(local_err);
>              }
>              return MIG_ITERATE_SKIP;
> @@ -3819,8 +3819,7 @@ static void *migration_thread(void *opaque)
>       * devices to unplug. This to preserve migration state transitions.
>       */
>      if (ret) {
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>                            MIGRATION_STATUS_FAILED);
>          goto out;
> @@ -3944,8 +3943,7 @@ static void *bg_migration_thread(void *opaque)
>       * devices to unplug. This to preserve migration state transitions.
>       */
>      if (ret) {
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
>                            MIGRATION_STATUS_FAILED);
>          goto fail_setup;
> @@ -4127,7 +4125,7 @@ void migration_connect(MigrationState *s, Error 
> *error_in)
>      return;
>  
>  fail:
> -    migrate_set_error(s, local_err);
> +    migrate_error_propagate(error_copy(local_err));
>      if (s->state != MIGRATION_STATUS_CANCELLING) {
>          migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>      }
> diff --git a/migration/multifd-device-state.c 
> b/migration/multifd-device-state.c
> index db3239fef5..3040d70872 100644
> --- a/migration/multifd-device-state.c
> +++ b/migration/multifd-device-state.c
> @@ -143,8 +143,6 @@ static int multifd_device_state_save_thread(void *opaque)
>      Error *local_err = NULL;
>  
>      if (!data->hdlr(data, &local_err)) {
> -        MigrationState *s = migrate_get_current();
> -
>          /*
>           * Can't call abort_device_state_save_threads() here since new
>           * save threads could still be in process of being launched
> @@ -158,8 +156,7 @@ static int multifd_device_state_save_thread(void *opaque)
>           * In case of multiple save threads failing which thread error
>           * return we end setting is purely arbitrary.
>           */
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>      }
>  
>      return 0;
> diff --git a/migration/multifd.c b/migration/multifd.c
> index c861b4b557..99717b64e9 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -428,8 +428,9 @@ static void multifd_send_set_error(Error *err)
>  
>      if (err) {
>          MigrationState *s = migrate_get_current();
> -        migrate_set_error(s, err);
> -        error_free(err);
> +
> +        migrate_error_propagate(err);
> +
>          if (s->state == MIGRATION_STATUS_SETUP ||
>              s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
>              s->state == MIGRATION_STATUS_DEVICE ||
> @@ -588,8 +589,7 @@ void multifd_send_shutdown(void)
>          Error *local_err = NULL;
>  
>          if (!multifd_send_cleanup_channel(p, &local_err)) {
> -            migrate_set_error(migrate_get_current(), local_err);
> -            error_free(local_err);
> +            migrate_error_propagate(local_err);
>          }
>      }
>  
> @@ -962,8 +962,7 @@ bool multifd_send_setup(void)
>          p->write_flags = 0;
>  
>          if (!multifd_new_send_channel_create(p, &local_err)) {
> -            migrate_set_error(s, local_err);
> -            error_free(local_err);
> +            migrate_error_propagate(local_err);
>              ret = -1;
>          }
>      }
> @@ -987,8 +986,7 @@ bool multifd_send_setup(void)
>  
>          ret = multifd_send_state->ops->send_setup(p, &local_err);
>          if (ret) {
> -            migrate_set_error(s, local_err);
> -            error_free(local_err);
> +            migrate_error_propagate(local_err);
>              goto err;
>          }
>          assert(p->iov);
> @@ -1067,8 +1065,9 @@ static void multifd_recv_terminate_threads(Error *err)
>  
>      if (err) {
>          MigrationState *s = migrate_get_current();
> -        migrate_set_error(s, err);
> -        error_free(err);
> +
> +        migrate_error_propagate(err);
> +
>          if (s->state == MIGRATION_STATUS_SETUP ||
>              s->state == MIGRATION_STATUS_ACTIVE) {
>              migrate_set_state(&s->state, s->state,
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 7c9fe61041..856366072a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1927,8 +1927,7 @@ postcopy_preempt_send_channel_done(MigrationState *s,
>                                     QIOChannel *ioc, Error *local_err)
>  {
>      if (local_err) {
> -        migrate_set_error(s, local_err);
> -        error_free(local_err);
> +        migrate_error_propagate(local_err);
>      } else {
>          migration_ioc_register_yank(ioc);
>          s->postcopy_qemufile_src = qemu_file_new_output(ioc);
> @@ -2162,7 +2161,7 @@ static void *postcopy_listen_thread(void *opaque)
>               * exit depending on if postcopy-exit-on-error is true, but the
>               * migration cannot be recovered.
>               */
> -            migrate_set_error(migr, local_err);
> +            migrate_error_propagate(error_copy(local_err));
>              error_report_err(local_err);
>              migrate_set_state(&mis->state, mis->state, 
> MIGRATION_STATUS_FAILED);
>              goto out;
> diff --git a/migration/ram.c b/migration/ram.c
> index 29f016cb25..1d2a47526e 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -4730,9 +4730,7 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier 
> *n, void *host,
>           * Abort and indicate a proper reason.
>           */
>          error_setg(&err, "RAM block '%s' resized during precopy.", 
> rb->idstr);
> -        migrate_set_error(migrate_get_current(), err);
> -        error_free(err);
> -
> +        migrate_error_propagate(err);
>          migration_cancel();
>      }
>  
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 638e9b364f..ab9d1e579e 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1125,13 +1125,12 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
>  int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
>  {
>      uint32_t tmp;
> -    MigrationState *ms = migrate_get_current();
>      Error *local_err = NULL;
>  
>      if (len > MAX_VM_CMD_PACKAGED_SIZE) {
>          error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
>                       __func__, len);
> -        migrate_set_error(ms, local_err);
> +        migrate_error_propagate(error_copy(local_err));
>          error_report_err(local_err);
>          return -1;
>      }
> @@ -1373,7 +1372,7 @@ int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>          if (se->vmsd && se->vmsd->early_setup) {
>              ret = vmstate_save(f, se, vmdesc, errp);
>              if (ret) {
> -                migrate_set_error(ms, *errp);
> +                migrate_error_propagate(error_copy(*errp));

Here, we need to keep the copy because we additionally propagate to the
caller.

>                  qemu_file_set_error(f, ret);
>                  break;
>              }

[...]


Reply via email to