* Peter Xu (pet...@redhat.com) wrote: > Introducing new migration state "postcopy-recover". If a migration > procedure is paused and the connection is rebuilt afterward > successfully, we'll switch the source VM state from "postcopy-paused" to > the new state "postcopy-recover", then we'll do the resume logic in the > migration thread (along with the return path thread). > > This patch only do the state switch on source side. Another following up > patch will handle the state switching on destination side using the same > status bit. > > Signed-off-by: Peter Xu <pet...@redhat.com> > --- > migration/migration.c | 45 +++++++++++++++++++++++++++++++++++++++++---- > qapi-schema.json | 4 +++- > 2 files changed, 44 insertions(+), 5 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 64de0ee..3aabe11 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -495,6 +495,7 @@ static bool migration_is_setup_or_active(int state) > case MIGRATION_STATUS_ACTIVE: > case MIGRATION_STATUS_POSTCOPY_ACTIVE: > case MIGRATION_STATUS_POSTCOPY_PAUSED: > + case MIGRATION_STATUS_POSTCOPY_RECOVER: > case MIGRATION_STATUS_SETUP: > return true; > > @@ -571,6 +572,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) > case MIGRATION_STATUS_CANCELLING: > case MIGRATION_STATUS_POSTCOPY_ACTIVE: > case MIGRATION_STATUS_POSTCOPY_PAUSED: > + case MIGRATION_STATUS_POSTCOPY_RECOVER: > /* TODO add some postcopy stats */ > info->has_status = true; > info->has_total_time = true; > @@ -2018,6 +2020,13 @@ static bool postcopy_should_start(MigrationState *s) > return atomic_read(&s->start_postcopy) || s->start_postcopy_fast; > } > > +/* Return zero if success, or <0 for error */ > +static int postcopy_do_resume(MigrationState *s) > +{ > + /* TODO: do the resume logic */ > + return 0; > +} > + > /* > * We don't return until we are in a safe state to continue current > * postcopy migration. Returns true to continue the migration, or > @@ -2026,7 +2035,9 @@ static bool postcopy_should_start(MigrationState *s) > static bool postcopy_pause(MigrationState *s) > { > assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); > - migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, > + > +do_pause: > + migrate_set_state(&s->state, s->state, > MIGRATION_STATUS_POSTCOPY_PAUSED); > > /* Current channel is possibly broken. Release it. */ > @@ -2043,9 +2054,32 @@ static bool postcopy_pause(MigrationState *s) > qemu_sem_wait(&s->postcopy_pause_sem); > } > > - trace_postcopy_pause_continued(); > + if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { > + /* We were waken up by a recover procedure. Give it a shot */ > > - return true; > + /* > + * Firstly, let's wake up the return path now, with a new > + * return path channel. > + */ > + qemu_sem_post(&s->postcopy_pause_rp_sem); > + > + /* Do the resume logic */ > + if (postcopy_do_resume(s) == 0) { > + /* Let's continue! */ > + trace_postcopy_pause_continued(); > + return true; > + } else { > + /* > + * Something wrong happened during the recovery, let's > + * pause again. Pause is always better than throwing data > + * away. > + */ > + goto do_pause;
You should be able to turn this around into a do {} while or similar rather than goto. Dave > + } > + } else { > + /* This is not right... Time to quit. */ > + return false; > + } > } > > /* Return true if we want to stop the migration, otherwise false. */ > @@ -2300,7 +2334,10 @@ void migrate_fd_connect(MigrationState *s) > } > > if (resume) { > - /* TODO: do the resume logic */ > + /* Wakeup the main migration thread to do the recovery */ > + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, > + MIGRATION_STATUS_POSTCOPY_RECOVER); > + qemu_sem_post(&s->postcopy_pause_sem); > return; > } > > diff --git a/qapi-schema.json b/qapi-schema.json > index 27b7c4c..10f1f60 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -669,6 +669,8 @@ > # > # @postcopy-paused: during postcopy but paused. (since 2.10) > # > +# @postcopy-recover: trying to recover from a paused postcopy. (since 2.11) > +# > # @completed: migration is finished. > # > # @failed: some error occurred during migration process. > @@ -682,7 +684,7 @@ > { 'enum': 'MigrationStatus', > 'data': [ 'none', 'setup', 'cancelling', 'cancelled', > 'active', 'postcopy-active', 'postcopy-paused', > - 'completed', 'failed', 'colo' ] } > + 'postcopy-recover', 'completed', 'failed', 'colo' ] } > > ## > # @MigrationInfo: > -- > 2.7.4 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK