[RFC v2 1/1] migration: Update error description whenever migration fails

2023-05-08 Thread tejus.gk
There are places in the code where the migration is marked failed with
MIGRATION_STATUS_FAILED, but the failiure reason is never updated. Hence
libvirt doesn't know why the migration failed when it queries for it.

Signed-off-by: tejus.gk 
---
 migration/migration.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 232e387109..87101eed5c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1660,15 +1660,9 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 } else if (strstart(uri, "fd:", )) {
 fd_start_outgoing_migration(s, p, _err);
 } else {
-if (!(has_resume && resume)) {
-yank_unregister_instance(MIGRATION_YANK_INSTANCE);
-}
-error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
+error_setg(_err, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
-migrate_set_state(>state, MIGRATION_STATUS_SETUP,
-  MIGRATION_STATUS_FAILED);
 block_cleanup_parameters();
-return;
 }
 
 if (local_err) {
@@ -2050,7 +2044,7 @@ migration_wait_main_channel(MigrationState *ms)
  * Switch from normal iteration to postcopy
  * Returns non-0 on error
  */
-static int postcopy_start(MigrationState *ms)
+static int postcopy_start(MigrationState *ms, Error **errp)
 {
 int ret;
 QIOChannelBuffer *bioc;
@@ -2165,7 +2159,7 @@ static int postcopy_start(MigrationState *ms)
  */
 ret = qemu_file_get_error(ms->to_dst_file);
 if (ret) {
-error_report("postcopy_start: Migration stream errored (pre package)");
+error_setg(errp, "postcopy_start: Migration stream errored (pre 
package)");
 goto fail_closefb;
 }
 
@@ -2202,7 +2196,7 @@ static int postcopy_start(MigrationState *ms)
 
 ret = qemu_file_get_error(ms->to_dst_file);
 if (ret) {
-error_report("postcopy_start: Migration stream errored");
+error_setg(errp, "postcopy_start: Migration stream errored");
 migrate_set_state(>state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
   MIGRATION_STATUS_FAILED);
 }
@@ -2719,6 +2713,7 @@ typedef enum {
 static MigIterateState migration_iteration_run(MigrationState *s)
 {
 uint64_t must_precopy, can_postcopy;
+Error *local_err = NULL;
 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
 
 qemu_savevm_state_pending_estimate(_precopy, _postcopy);
@@ -2741,8 +2736,9 @@ static MigIterateState 
migration_iteration_run(MigrationState *s)
 /* Still a significant amount to transfer */
 if (!in_postcopy && must_precopy <= s->threshold_size &&
 qatomic_read(>start_postcopy)) {
-if (postcopy_start(s)) {
-error_report("%s: postcopy failed to start", __func__);
+if (postcopy_start(s, _err)) {
+migrate_set_error(s, local_err);
+error_report_err(local_err);
 }
 return MIG_ITERATE_SKIP;
 }
@@ -3232,8 +3228,10 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
  */
 if (migrate_postcopy_ram() || migrate_return_path()) {
 if (open_return_path_on_source(s, !resume)) {
-error_report("Unable to open return-path for postcopy");
+error_setg(_err, "Unable to open return-path for postcopy");
 migrate_set_state(>state, s->state, MIGRATION_STATUS_FAILED);
+migrate_set_error(s, local_err);
+error_report_err(local_err);
 migrate_fd_cleanup(s);
 return;
 }
-- 
2.22.3




[RFC v2 0/1] migration: Update error description whenever migration fails

2023-05-08 Thread tejus.gk
Hi everyone, 

Thanks for the reviews. This is the v2 patchset based on the reviews 
recieved for the previous one. 

Links to previous patchsets:
v1: https://lists.gnu.org/archive/html/qemu-devel/2023-05/msg00868.html


tejus.gk (1):
  migration: Update error description whenever migration fails

 migration/migration.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

-- 
2.22.3




[RFC 1/1] migration: Update error description whenever migration fails

2023-05-03 Thread tejus.gk
There are places in the code where the migration is marked failed with
MIGRATION_STATUS_FAILED, but the failiure reason is never updated. Hence
libvirt doesn't know why the migration failed when it queries for it.

Signed-off-by: tejus.gk 
---
 migration/migration.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index feb5ab7493..0d7d34bf4d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1665,8 +1665,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 }
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
+error_setg(_err, QERR_INVALID_PARAMETER_VALUE, "uri",
+   "a valid migration protocol");
 migrate_set_state(>state, MIGRATION_STATUS_SETUP,
   MIGRATION_STATUS_FAILED);
+migrate_set_error(s, local_err);
 block_cleanup_parameters();
 return;
 }
@@ -2059,6 +2062,7 @@ static int postcopy_start(MigrationState *ms)
 int64_t bandwidth = migrate_max_postcopy_bandwidth();
 bool restart_block = false;
 int cur_state = MIGRATION_STATUS_ACTIVE;
+Error *local_err = NULL;
 
 if (migrate_postcopy_preempt()) {
 migration_wait_main_channel(ms);
@@ -2203,8 +2207,10 @@ static int postcopy_start(MigrationState *ms)
 ret = qemu_file_get_error(ms->to_dst_file);
 if (ret) {
 error_report("postcopy_start: Migration stream errored");
+error_setg(_err, "postcopy_start: Migration stream errored");
 migrate_set_state(>state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
   MIGRATION_STATUS_FAILED);
+migrate_set_error(ms, local_err);
 }
 
 trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
@@ -3233,7 +3239,9 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
 if (migrate_postcopy_ram() || migrate_return_path()) {
 if (open_return_path_on_source(s, !resume)) {
 error_report("Unable to open return-path for postcopy");
+error_setg(_err, "Unable to open return-path");
 migrate_set_state(>state, s->state, MIGRATION_STATUS_FAILED);
+migrate_set_error(s, local_err);
 migrate_fd_cleanup(s);
 return;
 }
-- 
2.22.3




[RFC 0/1] migration: Update error description whenever migration fails

2023-05-03 Thread tejus.gk
Hi everyone,

Currently, in QEMU, whenever a migration fails, its state is set to
MIGRATION_STATUS_FAILED
via the function migrate_set_state. However, there are places in the
code where the migration is marked as a failed migration; however, the
error description is never updated in the migration state object. This
causes problems when libvirt tries to query for the status of the
migration via a query-migrate; it never receives an error description
and hence reports the error as "unexpectedly failed". This
doesn't give us any information about what actually went wrong with
the migration.

An approach to solve this problem, which this patch explores, is to
update the migration errors through  migrate_set_error, whenever the
migration state is updated to MIGRATION_STATUS_FAILED. However,
sometimes these error descriptions can be due to various reasons or be
too vague.

An alternative approach to tackle this is to update the error
description from the point where the error actually occurred. For
instance, an error which occurs while saving the vmstate in the function
vmstate_save_state_v in the file migration/vmstate.c, results in a
failed migration, hence the error description can be updated here
itself, rather than updating it in the function migration_completion,
present in migration/migration.c.

tejus.gk (1):
  migration: Update error description whenever migration fails

 migration/migration.c | 8 
 1 file changed, 8 insertions(+)

-- 
2.22.3