Peter Xu <pet...@redhat.com> writes:

> On Mon, Dec 02, 2024 at 07:01:36PM -0300, Fabiano Rosas wrote:
>> If the destination side fails at migration_ioc_process_incoming()
>> before starting the coroutine, it will report the error but QEMU will
>> not exit.
>> 
>> Set the migration state to FAILED and exit the process if
>> exit-on-error allows.
>> 
>> CC: Thomas Huth <th...@redhat.com>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2633
>> Reported-by: Daniel P. Berrangé <berra...@redhat.com>
>> Signed-off-by: Fabiano Rosas <faro...@suse.de>
>
> (I skipped the postcopy patches as of now, until we finish the discussion
>  in patch 2)
>
>> ---
>>  migration/channel.c   | 11 ++++++-----
>>  migration/migration.c | 31 ++++++++++++++++++-------------
>>  migration/migration.h |  2 +-
>>  3 files changed, 25 insertions(+), 19 deletions(-)
>> 
>> diff --git a/migration/channel.c b/migration/channel.c
>> index f9de064f3b..6d7f9172d8 100644
>> --- a/migration/channel.c
>> +++ b/migration/channel.c
>> @@ -40,13 +40,14 @@ void migration_channel_process_incoming(QIOChannel *ioc)
>>  
>>      if (migrate_channel_requires_tls_upgrade(ioc)) {
>>          migration_tls_channel_process_incoming(s, ioc, &local_err);
>> +
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +        }
>
> What if tls processing failed here, do we have similar issue that qemu will
> stall?  Do we want to cover that too?
>

Hmm, I think I confused this with the multifd_channel_connect() stuff
where the code is reentrant and we take the "regular" path when called a
second time. I need to look closer into this part.

>> +
>>      } else {
>>          migration_ioc_register_yank(ioc);
>> -        migration_ioc_process_incoming(ioc, &local_err);
>> -    }
>> -
>> -    if (local_err) {
>> -        error_report_err(local_err);
>> +        migration_ioc_process_incoming(ioc);
>>      }
>>  }
>>  
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 8a61cc26d7..cd88ebc875 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -943,7 +943,7 @@ static bool migration_should_start_incoming(bool 
>> main_channel)
>>      return true;
>>  }
>>  
>> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>> +void migration_ioc_process_incoming(QIOChannel *ioc)
>>  {
>>      MigrationIncomingState *mis = migration_incoming_get_current();
>>      Error *local_err = NULL;
>> @@ -966,10 +966,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
>> Error **errp)
>>           * issue is not possible.
>>           */
>>          ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
>> -                                          sizeof(channel_magic), errp);
>> -
>> +                                          sizeof(channel_magic), 
>> &local_err);
>>          if (ret != 0) {
>> -            return;
>> +            goto err;
>>          }
>>  
>>          default_channel = (channel_magic == 
>> cpu_to_be32(QEMU_VM_FILE_MAGIC));
>> @@ -977,8 +976,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
>> Error **errp)
>>          default_channel = !mis->from_src_file;
>>      }
>>  
>> -    if (multifd_recv_setup(errp) != 0) {
>> -        return;
>> +    if (multifd_recv_setup(&local_err) != 0) {
>> +        goto err;
>>      }
>>  
>>      if (default_channel) {
>> @@ -995,18 +994,24 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
>> Error **errp)
>>              postcopy_preempt_new_channel(mis, f);
>>          }
>>          if (local_err) {
>> -            error_propagate(errp, local_err);
>> -            return;
>> +            goto err;
>>          }
>>      }
>>  
>> -    if (migration_should_start_incoming(default_channel)) {
>> -        /* If it's a recovery, we're done */
>> -        if (postcopy_try_recover()) {
>> -            return;
>> -        }
>> +    if (migration_should_start_incoming(default_channel) &&
>> +        !postcopy_try_recover()) {
>>          migration_incoming_process();
>>      }
>> +
>> +    return;
>> +
>> +err:
>> +    error_report_err(local_err);
>> +    migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
>> +                      MIGRATION_STATUS_FAILED);
>> +    if (mis->exit_on_error) {
>> +        exit(EXIT_FAILURE);
>> +    }
>>  }
>>  
>>  /**
>> diff --git a/migration/migration.h b/migration/migration.h
>> index 0956e9274b..c367e5ea40 100644
>> --- a/migration/migration.h
>> +++ b/migration/migration.h
>> @@ -477,7 +477,7 @@ void migrate_set_state(MigrationStatus *state, 
>> MigrationStatus old_state,
>>                         MigrationStatus new_state);
>>  
>>  void migration_fd_process_incoming(QEMUFile *f);
>> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
>> +void migration_ioc_process_incoming(QIOChannel *ioc);
>>  void migration_incoming_process(void);
>>  
>>  bool  migration_has_all_channels(void);
>> -- 
>> 2.35.3
>> 

Reply via email to