On Fri, Nov 16, 2018 at 11:57:27AM +0100, Cédric Le Goater wrote: > The interrupt mode is chosen by the CAS negotiation process and > activated after a reset to take into account the required changes in > the machine. This brings new constraints on how the associated KVM IRQ > device is initialized. > > Currently, each model takes care of the initialization of the KVM > device in their realize method but this is not possible anymore as the > initialization needs to done globaly when the interrupt mode is known, > i.e. when machine is reseted. It also means that we need a way to > delete a KVM device when another mode is chosen. > > Also, to support migration, the QEMU objects holding the state to > transfer should always be available but not necessarily activated. > > The overall approach of this proposal is to initialize both interrupt > mode at the QEMU level and keep the IRQ number space in sync to allow > switching from one mode to another. For the KVM side of things, the > whole initialization of the KVM device, sources and presenters, is > grouped in a single routine. The XICS and XIVE sPAPR IRQ reset > handlers are modified accordingly to handle the init and delete > sequences of the KVM device. The post_load handlers also are, to take > into account a possible change of interrupt mode after transfer. > > As KVM is now initialized at reset, we loose the possiblity to > fallback to the QEMU emulated mode in case of failure and failures > become fatal to the machine. > > Signed-off-by: Cédric Le Goater <c...@kaod.org> > --- > hw/intc/spapr_xive_kvm.c | 48 +++++++++++----------- > hw/intc/xics_kvm.c | 18 ++++++--- > hw/ppc/spapr_irq.c | 86 +++++++++++++++++++++++++++++----------- > 3 files changed, 98 insertions(+), 54 deletions(-) > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 0672d8bcbc6b..9c7d36f51e3d 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -148,7 +148,6 @@ static void xive_tctx_kvm_init(XiveTCTX *tctx, Error > **errp) > > static void xive_tctx_kvm_realize(DeviceState *dev, Error **errp) > { > - XiveTCTX *tctx = XIVE_TCTX_KVM(dev); > XiveTCTXClass *xtc = XIVE_TCTX_BASE_GET_CLASS(dev); > Error *local_err = NULL; > > @@ -157,12 +156,6 @@ static void xive_tctx_kvm_realize(DeviceState *dev, > Error **errp) > error_propagate(errp, local_err); > return; > } > - > - xive_tctx_kvm_init(tctx, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > } > > static void xive_tctx_kvm_class_init(ObjectClass *klass, void *data) > @@ -222,12 +215,9 @@ static void xive_source_kvm_init(XiveSource *xsrc, Error > **errp) > > static void xive_source_kvm_reset(DeviceState *dev) > { > - XiveSource *xsrc = XIVE_SOURCE_KVM(dev); > XiveSourceClass *xsc = XIVE_SOURCE_BASE_GET_CLASS(dev); > > xsc->parent_reset(dev); > - > - xive_source_kvm_init(xsrc, &error_fatal); > } > > /* > @@ -346,12 +336,6 @@ static void xive_source_kvm_realize(DeviceState *dev, > Error **errp) > > xsrc->qirqs = qemu_allocate_irqs(xive_source_kvm_set_irq, xsrc, > xsrc->nr_irqs); > - > - xive_source_kvm_mmap(xsrc, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > } > > static void xive_source_kvm_unrealize(DeviceState *dev, Error **errp) > @@ -823,6 +807,7 @@ void spapr_xive_kvm_init(sPAPRXive *xive, Error **errp) > { > Error *local_err = NULL; > size_t tima_len; > + CPUState *cs; > > if (!kvm_enabled() || !kvmppc_has_cap_xive()) { > error_setg(errp, > @@ -850,7 +835,18 @@ void spapr_xive_kvm_init(sPAPRXive *xive, Error **errp) > return; > } > > - /* Let the XiveSource KVM model handle the mapping for the moment */ > + xive_source_kvm_mmap(&xive->source, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + > + /* Create the KVM interrupt sources */ > + xive_source_kvm_init(&xive->source, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > > /* TIMA KVM mapping > * > @@ -869,6 +865,17 @@ void spapr_xive_kvm_init(sPAPRXive *xive, Error **errp) > "xive.tima", tima_len, xive->tm_mmap); > sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); > > + /* Connect the presenters to the VCPU */ > + CPU_FOREACH(cs) { > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + > + xive_tctx_kvm_init(XIVE_TCTX_BASE(cpu->intc), &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > + > kvm_kernel_irqchip = true; > kvm_msi_via_irqfd_allowed = true; > kvm_gsi_direct_mapping = true; > @@ -920,16 +927,9 @@ void spapr_xive_kvm_fini(sPAPRXive *xive, Error **errp) > > static void spapr_xive_kvm_realize(DeviceState *dev, Error **errp) > { > - sPAPRXive *xive = SPAPR_XIVE_KVM(dev); > sPAPRXiveClass *sxc = SPAPR_XIVE_BASE_GET_CLASS(dev); > Error *local_err = NULL; > > - spapr_xive_kvm_init(xive, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > - > /* Initialize the source and the local routing tables */ > sxc->parent_realize(dev, &local_err); > if (local_err) { > diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c > index a7e3ec32a761..c89fa943847c 100644 > --- a/hw/intc/xics_kvm.c > +++ b/hw/intc/xics_kvm.c > @@ -190,12 +190,6 @@ static void icp_kvm_realize(DeviceState *dev, Error > **errp) > error_propagate(errp, local_err); > return; > } > - > - icp_kvm_init(ICP(dev), &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > - return; > - } > } > > static void icp_kvm_class_init(ObjectClass *klass, void *data) > @@ -427,6 +421,8 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState > *spapr, > int xics_kvm_init(sPAPRMachineState *spapr, Error **errp) > { > int rc; > + CPUState *cs; > + Error *local_err = NULL; > > if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) > { > error_setg(errp, > @@ -475,6 +471,16 @@ int xics_kvm_init(sPAPRMachineState *spapr, Error **errp) > kvm_msi_via_irqfd_allowed = true; > kvm_gsi_direct_mapping = true; > > + /* Connect the presenters to the VCPU */ > + CPU_FOREACH(cs) { > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + > + icp_kvm_init(ICP(cpu->intc), &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + goto fail; > + } > + } > return 0; > > fail: > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > index 79ead51c630d..f1720a8dda33 100644 > --- a/hw/ppc/spapr_irq.c > +++ b/hw/ppc/spapr_irq.c > @@ -98,20 +98,14 @@ static void spapr_irq_init_xics(sPAPRMachineState *spapr, > int nr_irqs, > MachineState *machine = MACHINE(spapr); > Error *local_err = NULL; > > - if (kvm_enabled()) { > - if (machine_kernel_irqchip_allowed(machine) && > - !xics_kvm_init(spapr, &local_err)) { > - spapr->icp_type = TYPE_KVM_ICP; > - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, > - &local_err); > - } > - if (machine_kernel_irqchip_required(machine) && !spapr->ics) { > - error_prepend(&local_err, > - "kernel_irqchip requested but unavailable: "); > - goto error; > + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { > + spapr->icp_type = TYPE_KVM_ICP; > + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, > + &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > } > - error_free(local_err); > - local_err = NULL; > } > > if (!spapr->ics) { > @@ -119,10 +113,11 @@ static void spapr_irq_init_xics(sPAPRMachineState > *spapr, int nr_irqs, > spapr->icp_type = TYPE_ICP; > spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, > &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > } > - > -error: > - error_propagate(errp, local_err); > } > > #define ICS_IRQ_FREE(ics, srcno) \ > @@ -218,11 +213,28 @@ static int spapr_irq_post_load_xics(sPAPRMachineState > *spapr, int version_id) > > static void spapr_irq_reset_xics(sPAPRMachineState *spapr, Error **errp) > { > + MachineState *machine = MACHINE(spapr); > CPUState *cs; > + Error *local_err = NULL; > > CPU_FOREACH(cs) { > spapr_cpu_core_set_intc(POWERPC_CPU(cs), spapr->icp_type); > } > + > + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
Aren't both devices '_fini'-ed by the machine level reset handler, why does it need a _fini here as well as an init? > + xics_kvm_fini(spapr, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_prepend(errp, "KVM XICS fini failed: "); > + return; > + } > + xics_kvm_init(spapr, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_prepend(errp, "KVM XICS init failed: "); > + return; > + } > + } > } > > #define SPAPR_IRQ_XICS_NR_IRQS 0x1000 > @@ -288,10 +300,8 @@ static void spapr_irq_init_xive(sPAPRMachineState > *spapr, int nr_irqs, > spapr->xive_tctx_type = TYPE_XIVE_TCTX_KVM; > spapr->xive = spapr_xive_create(spapr, TYPE_SPAPR_XIVE_KVM, nr_irqs, > nr_servers, &local_err); > - > - if (local_err && machine_kernel_irqchip_required(machine)) { > + if (local_err) { > error_propagate(errp, local_err); > - error_prepend(errp, "kernel_irqchip requested but init failed : > "); > return; > } > > @@ -375,12 +385,29 @@ static int spapr_irq_post_load_xive(sPAPRMachineState > *spapr, int version_id) > > static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp) > { > + MachineState *machine = MACHINE(spapr); > CPUState *cs; > + Error *local_err = NULL; > > CPU_FOREACH(cs) { > spapr_cpu_core_set_intc(POWERPC_CPU(cs), spapr->xive_tctx_type); > } > > + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { > + spapr_xive_kvm_fini(spapr->xive, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_prepend(errp, "KVM XIVE fini failed: "); > + return; > + } > + spapr_xive_kvm_init(spapr->xive, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_prepend(errp, "KVM XIVE init failed: "); > + return; > + } > + } > + > spapr_xive_mmio_map(spapr->xive); > } > > @@ -432,11 +459,6 @@ static void spapr_irq_init_dual(sPAPRMachineState > *spapr, int nr_irqs, > { > Error *local_err = NULL; > > - if (kvm_enabled()) { > - error_setg(errp, "No KVM support for the 'dual' machine"); > - return; > - } > - > spapr_irq_xics.init(spapr, spapr_irq_xics.nr_irqs, nr_servers, > &local_err); > if (local_err) { > error_propagate(errp, local_err); > @@ -510,10 +532,15 @@ static Object > *spapr_irq_cpu_intc_create_dual(sPAPRMachineState *spapr, > > static int spapr_irq_post_load_dual(sPAPRMachineState *spapr, int version_id) > { > + MachineState *machine = MACHINE(spapr); > + > /* > * Force a reset of the XIVE backend after migration. > */ > if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { > + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { > + xics_kvm_fini(spapr, &error_fatal); > + } > spapr_irq_xive.reset(spapr, &error_fatal); > } > > @@ -522,6 +549,17 @@ static int spapr_irq_post_load_dual(sPAPRMachineState > *spapr, int version_id) > > static void spapr_irq_reset_dual(sPAPRMachineState *spapr, Error **errp) > { > + MachineState *machine = MACHINE(spapr); > + > + /* > + * Destroy all the KVM IRQ devices. This also clears the VCPU > + * presenters > + */ > + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { > + xics_kvm_fini(spapr, &error_fatal); > + spapr_xive_kvm_fini(spapr->xive, &error_fatal); > + } > + > /* > * Only XICS is reseted at startup as it is the default interrupt > * mode. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature