Peter Xu <pet...@redhat.com> writes: > @@ -1882,48 +1870,46 @@ static void *source_return_path_thread(void *opaque) > uint32_t tmp32, sibling_error; > ram_addr_t start = 0; /* =0 to silence warning */ > size_t len = 0, expected_len; > + Error *err = NULL; > int res; > > trace_source_return_path_thread_entry(); > rcu_register_thread(); > > - while (!ms->rp_state.error && !qemu_file_get_error(rp) && > + while (!migrate_has_error(ms) && !qemu_file_get_error(rp) && > migration_is_setup_or_active(ms->state)) { > trace_source_return_path_thread_loop_top(); > + > header_type = qemu_get_be16(rp); > header_len = qemu_get_be16(rp); > > if (qemu_file_get_error(rp)) { > - mark_source_rp_bad(ms); > goto out; > }
This error will be lost because outside the loop we only check for err. > > if (header_type >= MIG_RP_MSG_MAX || > header_type == MIG_RP_MSG_INVALID) { > - error_report("RP: Received invalid message 0x%04x length 0x%04x", > - header_type, header_len); > - mark_source_rp_bad(ms); > + error_setg(&err, "Received invalid message 0x%04x length 0x%04x", > + header_type, header_len); > goto out; > } > > if ((rp_cmd_args[header_type].len != -1 && > header_len != rp_cmd_args[header_type].len) || > header_len > sizeof(buf)) { > - error_report("RP: Received '%s' message (0x%04x) with" > - "incorrect length %d expecting %zu", > - rp_cmd_args[header_type].name, header_type, > header_len, > - (size_t)rp_cmd_args[header_type].len); > - mark_source_rp_bad(ms); > + error_setg(&err, "Received '%s' message (0x%04x) with" > + "incorrect length %d expecting %zu", > + rp_cmd_args[header_type].name, header_type, > header_len, > + (size_t)rp_cmd_args[header_type].len); > goto out; > } > > /* We know we've got a valid header by this point */ > res = qemu_get_buffer(rp, buf, header_len); > if (res != header_len) { > - error_report("RP: Failed reading data for message 0x%04x" > - " read %d expected %d", > - header_type, res, header_len); > - mark_source_rp_bad(ms); > + error_setg(&err, "Failed reading data for message 0x%04x" > + " read %d expected %d", > + header_type, res, header_len); > goto out; > } > > @@ -1933,8 +1919,7 @@ static void *source_return_path_thread(void *opaque) > sibling_error = ldl_be_p(buf); > trace_source_return_path_thread_shut(sibling_error); > if (sibling_error) { > - error_report("RP: Sibling indicated error %d", > sibling_error); > - mark_source_rp_bad(ms); > + error_setg(&err, "Sibling indicated error %d", > sibling_error); > } > /* > * We'll let the main thread deal with closing the RP > @@ -1952,7 +1937,10 @@ static void *source_return_path_thread(void *opaque) > case MIG_RP_MSG_REQ_PAGES: > start = ldq_be_p(buf); > len = ldl_be_p(buf + 8); > - migrate_handle_rp_req_pages(ms, NULL, start, len); > + migrate_handle_rp_req_pages(ms, NULL, start, len, &err); > + if (err) { > + goto out; > + } > break; > > case MIG_RP_MSG_REQ_PAGES_ID: > @@ -1967,32 +1955,32 @@ static void *source_return_path_thread(void *opaque) > expected_len += tmp32; > } > if (header_len != expected_len) { > - error_report("RP: Req_Page_id with length %d expecting %zd", > - header_len, expected_len); > - mark_source_rp_bad(ms); > + error_setg(&err, "Req_Page_id with length %d expecting %zd", > + header_len, expected_len); > + goto out; > + } > + migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len, > + &err); > + if (err) { > goto out; > } > - migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); > break; > > case MIG_RP_MSG_RECV_BITMAP: > if (header_len < 1) { > - error_report("%s: missing block name", __func__); > - mark_source_rp_bad(ms); > + error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block > name"); > goto out; > } > /* Format: len (1B) + idstr (<255B). This ends the idstr. */ > buf[buf[0] + 1] = '\0'; > - if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { > - mark_source_rp_bad(ms); > + if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { > goto out; > } > break; > > case MIG_RP_MSG_RESUME_ACK: > tmp32 = ldl_be_p(buf); > - if (migrate_handle_rp_resume_ack(ms, tmp32)) { > - mark_source_rp_bad(ms); > + if (migrate_handle_rp_resume_ack(ms, tmp32, &err)) { > goto out; > } > break; > @@ -2008,9 +1996,14 @@ static void *source_return_path_thread(void *opaque) > } > > out: > - if (qemu_file_get_error(rp)) { > + if (err) { Need to keep both checks here. > + /* > + * Collect any error in return-path thread and report it to the > + * migration state object. > + */ > + migrate_set_error(ms, err); > + error_free(err); > trace_source_return_path_thread_bad_end(); > - mark_source_rp_bad(ms); > } > > trace_source_return_path_thread_end(); > @@ -2036,13 +2029,10 @@ static int open_return_path_on_source(MigrationState > *ms) > return 0; > }