close_return_path_on_source() retrieves the migration error from the the QEMUFile '->to_dst_file' to know if a shutdown is required to exit the return-path thread. However, in migrate_fd_cleanup(), '->to_dst_file' is cleaned up before calling close_return_path_on_source() and the shutdown is never performed, leaving the source and destination waiting for an event to occur.
Cache the file error in a temporary variable and pass it to close_return_path_on_source() to avoid relying on '->to_dst_file'. Signed-off-by: Cédric Le Goater <c...@redhat.com> --- migration/migration.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index d5f705ceef4c925589aa49335969672c0d761fa2..2c3362235c7651c11d581f3c3639571f1f9636ef 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -100,7 +100,7 @@ static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); static void migrate_fd_cancel(MigrationState *s); -static bool close_return_path_on_source(MigrationState *s); +static bool close_return_path_on_source(MigrationState *s, int eror); static void migration_downtime_start(MigrationState *s) { @@ -1313,6 +1313,8 @@ void migrate_set_state(int *state, int old_state, int new_state) static void migrate_fd_cleanup(MigrationState *s) { + int file_error = 0; + g_free(s->hostname); s->hostname = NULL; json_writer_free(s->vmdesc); @@ -1333,6 +1335,7 @@ static void migrate_fd_cleanup(MigrationState *s) multifd_save_cleanup(); qemu_mutex_lock(&s->qemu_file_lock); + file_error = qemu_file_get_error(s->to_dst_file); tmp = s->to_dst_file; s->to_dst_file = NULL; qemu_mutex_unlock(&s->qemu_file_lock); @@ -1348,7 +1351,7 @@ static void migrate_fd_cleanup(MigrationState *s) * We already cleaned up to_dst_file, so errors from the return * path might be due to that, ignore them. */ - close_return_path_on_source(s); + close_return_path_on_source(s, file_error); assert(!migration_is_active(s)); @@ -2357,7 +2360,7 @@ static int open_return_path_on_source(MigrationState *ms) } /* Return true if error detected, or false otherwise */ -static bool close_return_path_on_source(MigrationState *ms) +static bool close_return_path_on_source(MigrationState *ms, int file_error) { if (!ms->rp_state.rp_thread_created) { return false; @@ -2372,8 +2375,7 @@ static bool close_return_path_on_source(MigrationState *ms) * cause it to unblock if it's stuck waiting for the destination. */ WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { - if (ms->to_dst_file && ms->rp_state.from_dst_file && - qemu_file_get_error(ms->to_dst_file)) { + if (file_error && ms->rp_state.from_dst_file) { qemu_file_shutdown(ms->rp_state.from_dst_file); } } @@ -2707,6 +2709,7 @@ static void migration_completion(MigrationState *s) { int ret = 0; int current_active_state = s->state; + int file_error = qemu_file_get_error(s->to_dst_file); if (s->state == MIGRATION_STATUS_ACTIVE) { ret = migration_completion_precopy(s, ¤t_active_state); @@ -2720,11 +2723,11 @@ static void migration_completion(MigrationState *s) goto fail; } - if (close_return_path_on_source(s)) { + if (close_return_path_on_source(s, file_error)) { goto fail; } - if (qemu_file_get_error(s->to_dst_file)) { + if (file_error) { trace_migration_completion_file_err(); goto fail; } @@ -2861,6 +2864,7 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; + int file_error; /* * Current channel is possibly broken. Release it. Note that this is @@ -2874,6 +2878,7 @@ static MigThrError postcopy_pause(MigrationState *s) assert(s->to_dst_file); migration_ioc_unregister_yank_from_file(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); + file_error = qemu_file_get_error(s->to_dst_file); file = s->to_dst_file; s->to_dst_file = NULL; qemu_mutex_unlock(&s->qemu_file_lock); @@ -2886,7 +2891,7 @@ static MigThrError postcopy_pause(MigrationState *s) * path and just wait for the thread to finish. It will be * re-created when we resume. */ - close_return_path_on_source(s); + close_return_path_on_source(s, file_error); migrate_set_state(&s->state, s->state, MIGRATION_STATUS_POSTCOPY_PAUSED); -- 2.43.0