On 6/29/2022 4:43 PM, Alex Williamson wrote: > On Wed, 15 Jun 2022 07:52:16 -0700 > Steve Sistare <steven.sist...@oracle.com> wrote: > >> Preserve vfio INTX state across cpr restart. Preserve VFIOINTx fields as >> follows: >> pin : Recover this from the vfio config in kernel space >> interrupt : Preserve its eventfd descriptor across exec. >> unmask : Ditto >> route.irq : This could perhaps be recovered in vfio_pci_post_load by >> calling pci_device_route_intx_to_irq(pin), whose implementation reads >> config space for a bridge device such as ich9. However, there is no >> guarantee that the bridge vmstate is read before vfio vmstate. Rather >> than fiddling with MigrationPriority for vmstate handlers, explicitly >> save route.irq in vfio vmstate. >> pending : save in vfio vmstate. >> mmap_timeout, mmap_timer : Re-initialize >> bool kvm_accel : Re-initialize >> >> In vfio_realize, defer calling vfio_intx_enable until the vmstate >> is available, in vfio_pci_post_load. Modify vfio_intx_enable and >> vfio_intx_kvm_enable to skip vfio initialization, but still perform >> kvm initialization. >> >> Signed-off-by: Steve Sistare <steven.sist...@oracle.com> >> --- >> hw/vfio/pci.c | 92 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++------ >> 1 file changed, 83 insertions(+), 9 deletions(-) >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index 2fd7121..b8aee91 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -173,14 +173,45 @@ static void vfio_intx_eoi(VFIODevice *vbasedev) >> vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX); >> } >> >> +#ifdef CONFIG_KVM >> +static bool vfio_no_kvm_intx(VFIOPCIDevice *vdev) >> +{ >> + return vdev->no_kvm_intx || !kvm_irqfds_enabled() || >> + vdev->intx.route.mode != PCI_INTX_ENABLED || >> + !kvm_resamplefds_enabled(); >> +} >> +#endif >> + >> +static void vfio_intx_reenable_kvm(VFIOPCIDevice *vdev, Error **errp) >> +{ >> +#ifdef CONFIG_KVM >> + if (vfio_no_kvm_intx(vdev)) { >> + return; >> + } >> + >> + if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) { >> + error_setg(errp, "vfio_notifier_init intx-unmask failed"); >> + return; >> + } >> + >> + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, >> + &vdev->intx.interrupt, >> + &vdev->intx.unmask, >> + vdev->intx.route.irq)) { >> + error_setg_errno(errp, errno, "failed to setup resample irqfd"); > > > Does not unwind with vfio_notifier_cleanup(). This also exactly > duplicates code in vfio_intx_enable_kvm(), which suggests it needs > further refactoring to a common helper.
I will delete vfio_intx_reenable_kvm and add conditionals to vfio_intx_enable_kvm. That looks better. >> + return; >> + } >> + >> + vdev->intx.kvm_accel = true; >> +#endif >> +} >> + >> static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) >> { >> #ifdef CONFIG_KVM >> int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt); >> >> - if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || >> - vdev->intx.route.mode != PCI_INTX_ENABLED || >> - !kvm_resamplefds_enabled()) { >> + if (vfio_no_kvm_intx(vdev)) { >> return; >> } >> >> @@ -328,7 +359,13 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error >> **errp) >> return 0; >> } >> >> - vfio_disable_interrupts(vdev); >> + /* >> + * Do not alter interrupt state during vfio_realize and cpr-load. The >> + * reused flag is cleared thereafter. >> + */ >> + if (!vdev->vbasedev.reused) { >> + vfio_disable_interrupts(vdev); >> + } >> >> vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ >> pci_config_set_interrupt_pin(vdev->pdev.config, pin); >> @@ -353,6 +390,11 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error >> **errp) >> fd = event_notifier_get_fd(&vdev->intx.interrupt); >> qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev); >> >> + if (vdev->vbasedev.reused) { >> + vfio_intx_reenable_kvm(vdev, &err); >> + goto finish; >> + } >> + > > This only jumps over the vfio_set_irq_signaling() and > vfio_intx_enable_kvm(), largely replacing the latter with chunks of > code taken from it. Doesn't seem like the right factoring. Cleaned up in the next version. >> if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, >> VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { >> qemu_set_fd_handler(fd, NULL, NULL, vdev); >> @@ -365,6 +407,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error >> **errp) >> warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); >> } >> >> +finish: >> vdev->interrupt = VFIO_INT_INTx; >> >> trace_vfio_intx_enable(vdev->vbasedev.name); >> @@ -3195,9 +3238,13 @@ static void vfio_realize(PCIDevice *pdev, Error >> **errp) >> vfio_intx_routing_notifier); >> vdev->irqchip_change_notifier.notify = vfio_irqchip_change; >> kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); >> - ret = vfio_intx_enable(vdev, errp); >> - if (ret) { >> - goto out_deregister; >> + >> + /* Wait until cpr-load reads intx routing data to enable */ >> + if (!vdev->vbasedev.reused) { >> + ret = vfio_intx_enable(vdev, errp); >> + if (ret) { >> + goto out_deregister; >> + } >> } >> } >> >> @@ -3474,6 +3521,7 @@ static int vfio_pci_post_load(void *opaque, int >> version_id) >> VFIOPCIDevice *vdev = opaque; >> PCIDevice *pdev = &vdev->pdev; >> int nr_vectors; >> + int ret = 0; >> >> if (msix_enabled(pdev)) { >> msix_set_vector_notifiers(pdev, vfio_msix_vector_use, >> @@ -3486,10 +3534,35 @@ static int vfio_pci_post_load(void *opaque, int >> version_id) >> vfio_claim_vectors(vdev, nr_vectors, false); >> >> } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { >> - assert(0); /* completed in a subsequent patch */ >> + Error *err = 0; >> + ret = vfio_intx_enable(vdev, &err); >> + if (ret) { >> + error_report_err(err); >> + } >> } >> >> - return 0; >> + return ret; >> +} >> + >> +static const VMStateDescription vfio_intx_vmstate = { >> + .name = "vfio-intx", >> + .unmigratable = 1, > > > unmigratable-vmstates-to-interfere-with-migration+ A bug, will delete. - Steve >> + .version_id = 0, >> + .minimum_version_id = 0, >> + .fields = (VMStateField[]) { >> + VMSTATE_BOOL(pending, VFIOINTx), >> + VMSTATE_UINT32(route.mode, VFIOINTx), >> + VMSTATE_INT32(route.irq, VFIOINTx), >> + VMSTATE_END_OF_LIST() >> + } >> +}; >> + >> +#define VMSTATE_VFIO_INTX(_field, _state) { \ >> + .name = (stringify(_field)), \ >> + .size = sizeof(VFIOINTx), \ >> + .vmsd = &vfio_intx_vmstate, \ >> + .flags = VMS_STRUCT, \ >> + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \ >> } >> >> static bool vfio_pci_needed(void *opaque) >> @@ -3509,6 +3582,7 @@ static const VMStateDescription vfio_pci_vmstate = { >> .fields = (VMStateField[]) { >> VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), >> VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), >> + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), >> VMSTATE_END_OF_LIST() >> } >> }; >