On 3/15/24 13:20, Cédric Le Goater wrote:
On 3/15/24 12:01, Peter Xu wrote:
On Fri, Mar 15, 2024 at 11:17:45AM +0100, Cédric Le Goater wrote:
migrate_set_state is also unintuitive because it ignores invalid state
transitions and we've been using that property to deal with special
states such as POSTCOPY_PAUSED and FAILED:

- After the migration goes into POSTCOPY_PAUSED, the resumed migration's
    migrate_init() will try to set the state NONE->SETUP, which is not
    valid.

- After save_setup fails, the migration goes into FAILED, but wait_unplug
    will try to transition SETUP->ACTIVE, which is also not valid.


I am not sure I understand what the plan is. Both solutions are problematic
regarding the state transitions.

Should we consider that waiting for failover devices to unplug is an internal
step of the SETUP phase not transitioning to ACTIVE ?

If to unblock this series, IIUC the simplest solution is to do what Fabiano
suggested, that we move qemu_savevm_wait_unplug() to be before the check of
setup() ret.

The simplest is IMHO moving qemu_savevm_wait_unplug() before
qemu_savevm_state_setup() and leave patch 10 is unchanged. See
below the extra patch. It looks much cleaner than what we have
today.

In that case, the state change in qemu_savevm_wait_unplug()
should be benign and we should see a super small window it became ACTIVE
but then it should be FAILED (and IIUC the patch itself will need to use
ACTIVE as "old_state", not SETUP anymore).

OK. I will give it a try to compare.

Here's the alternative solution. SETUP state failures are handled after
transitioning to ACTIVE state, which is unfortunate but probably harmless.
I guess it's OK.

Thanks,

C.



Signed-off-by: Cédric Le Goater <c...@redhat.com>
---
 migration/savevm.h    |  2 +-
 migration/migration.c | 29 +++++++++++++++++++++++++++--
 migration/savevm.c    | 26 +++++++++++++++-----------
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/migration/savevm.h b/migration/savevm.h
index 
74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328
 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -32,7 +32,7 @@
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
 int qemu_savevm_state_prepare(Error **errp);
-void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
 bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_state_header(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index 
644e073b7dcc70cb2bdaa9c975ba478952465ff4..0704ad6226df61f2f15bd81a2897f9946d601ca7
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3427,6 +3427,8 @@ static void *migration_thread(void *opaque)
     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     MigThrError thr_error;
     bool urgent = false;
+    Error *local_err = NULL;
+    int ret;
thread = migration_threads_add("live_migration", qemu_get_thread_id()); @@ -3470,12 +3472,24 @@ static void *migration_thread(void *opaque)
     }
bql_lock();
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
     bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                MIGRATION_STATUS_ACTIVE);
+ /*
+     * Handle SETUP failures after waiting for virtio-net-failover
+     * devices to unplug. This to preserve migration state transitions.
+     */
+    if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                          MIGRATION_STATUS_FAILED);
+        goto out;
+    }
+
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
trace_migration_thread_setup_complete();
@@ -3549,6 +3563,8 @@ static void *bg_migration_thread(void *opaque)
     MigThrError thr_error;
     QEMUFile *fb;
     bool early_fail = true;
+    Error *local_err = NULL;
+    int ret;
rcu_register_thread();
     object_ref(OBJECT(s));
@@ -3582,12 +3598,20 @@ static void *bg_migration_thread(void *opaque)
bql_lock();
     qemu_savevm_state_header(s->to_dst_file);
-    qemu_savevm_state_setup(s->to_dst_file);
+    ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
     bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
                                MIGRATION_STATUS_ACTIVE);
+ if (ret) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                          MIGRATION_STATUS_FAILED);
+        goto fail_setup;
+    }
+
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
trace_migration_thread_setup_complete();
@@ -3656,6 +3680,7 @@ fail:
         bql_unlock();
     }
+fail_setup:
     bg_migration_iteration_finish(s);
qemu_fclose(fb);
diff --git a/migration/savevm.c b/migration/savevm.c
index 
1a7b5cb78a912c36ae16db703afc90ef2906b61f..0eb94e61f888adba2c0732c2cb701b110814c455
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
     return 0;
 }
-void qemu_savevm_state_setup(QEMUFile *f)
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
 {
+    ERRP_GUARD();
     MigrationState *ms = migrate_get_current();
     SaveStateEntry *se;
-    Error *local_err = NULL;
     int ret = 0;
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
@@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
     trace_savevm_state_setup();
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->early_setup) {
-            ret = vmstate_save(f, se, ms->vmdesc, &local_err);
+            ret = vmstate_save(f, se, ms->vmdesc, errp);
             if (ret) {
-                migrate_set_error(ms, local_err);
-                error_report_err(local_err);
+                migrate_set_error(ms, *errp);
                 qemu_file_set_error(f, ret);
                 break;
             }
@@ -1346,18 +1345,19 @@ void qemu_savevm_state_setup(QEMUFile *f)
         ret = se->ops->save_setup(f, se->opaque);
         save_section_footer(f, se);
         if (ret < 0) {
+            error_setg(errp, "failed to setup SaveStateEntry with id(name): "
+                       "%d(%s): %d", se->section_id, se->idstr, ret);
             qemu_file_set_error(f, ret);
             break;
         }
     }
if (ret) {
-        return;
+        return ret;
     }
- if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
-        error_report_err(local_err);
-    }
+    /* TODO: Should we check that errp is set in case of failure ? */
+    return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
 }
int qemu_savevm_state_resume_prepare(MigrationState *s)
@@ -1725,7 +1725,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     ms->to_dst_file = f;
qemu_savevm_state_header(f);
-    qemu_savevm_state_setup(f);
+    ret = qemu_savevm_state_setup(f, errp);
+    if (ret) {
+        goto cleanup;
+    }
while (qemu_file_get_error(f) == 0) {
         if (qemu_savevm_state_iterate(f, false) > 0) {
@@ -1738,10 +1741,11 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
         qemu_savevm_state_complete_precopy(f, false, false);
         ret = qemu_file_get_error(f);
     }
-    qemu_savevm_state_cleanup();
     if (ret != 0) {
         error_setg_errno(errp, -ret, "Error while writing VM state");
     }
+cleanup:
+    qemu_savevm_state_cleanup();
if (ret != 0) {
         status = MIGRATION_STATUS_FAILED;
--
2.44.0



Reply via email to