On Thu, 6 Jun 2019 13:06:14 +1000 David Gibson <da...@gibson.dropbear.id.au> wrote:
> On Wed, May 29, 2019 at 11:10:57AM +0530, Aravinda Prasad wrote: > > This patch includes migration support for machine check > > handling. Especially this patch blocks VM migration > > requests until the machine check error handling is > > complete as (i) these errors are specific to the source > > hardware and is irrelevant on the target hardware, > > (ii) these errors cause data corruption and should > > be handled before migration. > > > > Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> > > --- > > hw/ppc/spapr.c | 20 ++++++++++++++++++++ > > hw/ppc/spapr_events.c | 17 +++++++++++++++++ > > hw/ppc/spapr_rtas.c | 4 ++++ > > include/hw/ppc/spapr.h | 2 ++ > > 4 files changed, 43 insertions(+) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index e8a77636..31c4850 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = { > > }, > > }; > > > > +static bool spapr_fwnmi_needed(void *opaque) > > +{ > > + SpaprMachineState *spapr = (SpaprMachineState *)opaque; > > + > > + return (spapr->guest_machine_check_addr == -1) ? 0 : 1; > > Since we're introducing a PAPR capability to enable this, it would > actually be better to check that here, rather than the runtime state. > That leads to less cases and easier to understand semantics for the > migration stream. > Hmmm... the purpose of needed() VMState callbacks is precisely about runtime state: the subsection should only be migrated if an MCE is pending, ie. spapr->guest_machine_check_addr != -1. > > +} > > + > > +static const VMStateDescription vmstate_spapr_machine_check = { > > + .name = "spapr_machine_check", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .needed = spapr_fwnmi_needed, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState), > > + VMSTATE_INT32(mc_status, SpaprMachineState), > > + VMSTATE_END_OF_LIST() > > + }, > > +}; > > + > > static const VMStateDescription vmstate_spapr = { > > .name = "spapr", > > .version_id = 3, > > @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = { > > &vmstate_spapr_dtb, > > &vmstate_spapr_cap_large_decr, > > &vmstate_spapr_cap_ccf_assist, > > + &vmstate_spapr_machine_check, > > NULL > > } > > }; > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > index 573c0b7..35e21e4 100644 > > --- a/hw/ppc/spapr_events.c > > +++ b/hw/ppc/spapr_events.c > > @@ -41,6 +41,7 @@ > > #include "qemu/bcd.h" > > #include "hw/ppc/spapr_ovec.h" > > #include <libfdt.h> > > +#include "migration/blocker.h" > > > > #define RTAS_LOG_VERSION_MASK 0xff000000 > > #define RTAS_LOG_VERSION_6 0x06000000 > > @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, > > bool recovered) > > void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > > { > > SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > + int ret; > > + Error *local_err = NULL; > > + > > + error_setg(&spapr->fwnmi_migration_blocker, > > + "Live migration not supported during machine check handling"); > > + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err); > > + if (ret < 0) { > > + /* > > + * We don't want to abort and let the migration to continue. In a > > + * rare case, the machine check handler will run on the target > > + * hardware. Though this is not preferable, it is better than > > aborting > > + * the migration or killing the VM. > > + */ > > + error_free(spapr->fwnmi_migration_blocker); > > You should set fwnmi_migration_blocker to NULL here as well. > > As mentioned on an earlier iteration, the migration blocker is the > same every time. Couldn't you just create it once and free at final > teardown, rather than recreating it for every NMI? > > > + warn_report_err(local_err); > > + } > > > > while (spapr->mc_status != -1) { > > /* > > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c > > index 91a7ab9..c849223 100644 > > --- a/hw/ppc/spapr_rtas.c > > +++ b/hw/ppc/spapr_rtas.c > > @@ -50,6 +50,7 @@ > > #include "target/ppc/mmu-hash64.h" > > #include "target/ppc/mmu-book3s-v3.h" > > #include "kvm_ppc.h" > > +#include "migration/blocker.h" > > > > static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState > > *spapr, > > uint32_t token, uint32_t nargs, > > @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, > > spapr->mc_status = -1; > > qemu_cond_signal(&spapr->mc_delivery_cond); > > rtas_st(rets, 0, RTAS_OUT_SUCCESS); > > + migrate_del_blocker(spapr->fwnmi_migration_blocker); > > + error_free(spapr->fwnmi_migration_blocker); > > + spapr->fwnmi_migration_blocker = NULL; > > } > > } > > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > index bd75d4b..6c0cfd8 100644 > > --- a/include/hw/ppc/spapr.h > > +++ b/include/hw/ppc/spapr.h > > @@ -214,6 +214,8 @@ struct SpaprMachineState { > > SpaprCapabilities def, eff, mig; > > > > unsigned gpu_numa_id; > > + > > + Error *fwnmi_migration_blocker; > > }; > > > > #define H_SUCCESS 0 > > >
pgpRdkiIrBm6H.pgp
Description: OpenPGP digital signature