On Wed, Aug 31, 2016 at 02:13:09PM -0600, Alex Williamson wrote: > On Tue, 19 Jul 2016 15:38:28 +0800 > Zhou Jie <zhoujie2...@cn.fujitsu.com> wrote: > > > From: Chen Fan <chen.fan.f...@cn.fujitsu.com> > > > > For supporting aer recovery, host and guest would run the same aer > > recovery code, that would do the secondary bus reset if the error > > is fatal, the aer recovery process: > > 1. error_detected > > 2. reset_link (if fatal) > > 3. slot_reset/mmio_enabled > > 4. resume > > > > It indicates that host will do secondary bus reset to reset > > the physical devices under bus in step 2, that would cause > > devices in D3 status in a short time. But in qemu, we register > > an error detected handler, that would be invoked as host broadcasts > > the error-detected event in step 1, in order to avoid guest do > > reset_link when host do reset_link simultaneously. it may cause > > fatal error. we poll the vfio_device_info to assure host reset > > completely. > > In qemu, the aer recovery process: > > 1. Detect support for aer error progress > > If host vfio driver does not support for aer error progress, > > directly fail to boot up VM as with aer enabled. > > 2. Immediately notify the VM on error detected. > > 3. Wait for host aer error progress > > Poll the vfio_device_info, If it is still in aer error progress after > > some timeout, we would abort the guest directed bus reset > > altogether and unplug of the device to prevent it from further > > interacting with the VM. > > 4. Reset bus. > > > > Signed-off-by: Chen Fan <chen.fan.f...@cn.fujitsu.com> > > Signed-off-by: Zhou Jie <zhoujie2...@cn.fujitsu.com> > > --- > > hw/vfio/pci.c | 51 > > +++++++++++++++++++++++++++++++++++++++++++++- > > hw/vfio/pci.h | 1 + > > linux-headers/linux/vfio.h | 4 ++++ > > 3 files changed, 55 insertions(+), 1 deletion(-) > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index 0e42786..777245c 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -35,6 +35,12 @@ > > > > #define MSIX_CAP_LENGTH 12 > > > > +/* > > + * Timeout for waiting host aer error process, it is 3 seconds. > > + * For hardware bus reset 3 seconds will be enough. > > + */ > > +#define PCI_AER_PROCESS_TIMEOUT 3000000 > > Why is 3 seconds "enough"? What considerations went into determining > this that would need to be re-evaluated if we ever want to change it? > 24 hours is enough, but why was 3 seconds chosen over 24 hours? Why > would 2 seconds be a worse choice? 1?
And just to clarify, the answer belongs in a code comment and possibly commit log, not just in an email response. > > + > > static void vfio_disable_interrupts(VFIOPCIDevice *vdev); > > static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); > > > > @@ -1913,6 +1919,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice > > *vdev, Error **errp) > > VFIOGroup *group; > > int ret, i, devfn, range_limit; > > > > + if (!(vdev->vbasedev.flags & VFIO_DEVICE_FLAGS_AERPROCESS)) { > > + error_setg(errp, "vfio: Cannot enable AER for device %s," > > + " host vfio driver does not support for" > > + " aer error progress", > > + vdev->vbasedev.name); > > + return; > > + } > > + > > ret = vfio_get_hot_reset_info(vdev, &info); > > if (ret) { > > error_setg(errp, "vfio: Cannot enable AER for device %s," > > @@ -2679,6 +2693,11 @@ static void vfio_err_notifier_handler(void *opaque) > > msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN : > > PCI_ERR_ROOT_CMD_NONFATAL_EN; > > > > + if (isfatal) { > > + PCIDevice *dev_0 = pci_get_function_0(dev); > > + VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0); > > + vdev_0->pci_aer_error_signaled = true; > > + } > > pcie_aer_msg(dev, &msg); > > return; > > } > > @@ -3163,6 +3182,19 @@ static void vfio_exitfn(PCIDevice *pdev) > > vfio_bars_exit(vdev); > > } > > > > +static int vfio_aer_error_is_in_process(VFIOPCIDevice *vdev) > > +{ > > + struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; > > + int ret; > > + > > + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info); > > + if (ret) { > > + error_report("vfio: error getting device info: %m"); > > + return ret; > > + } > > + return dev_info.flags & VFIO_DEVICE_FLAGS_INAERPROCESS ? 1 : 0; > > +} > > + > > static void vfio_pci_reset(DeviceState *dev) > > { > > PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); > > @@ -3176,7 +3208,24 @@ static void vfio_pci_reset(DeviceState *dev) > > if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) & > > PCI_BRIDGE_CTL_BUS_RESET)) { > > if (pci_get_function_0(pdev) == pdev) { > > - vfio_pci_hot_reset(vdev, vdev->single_depend_dev); > > + if (!vdev->pci_aer_error_signaled) { > > + vfio_pci_hot_reset(vdev, vdev->single_depend_dev); > > + } else { > > + int i; > > + for (i = 0; i < 1000; i++) { > > + if (!vfio_aer_error_is_in_process(vdev)) { > > + break; > > + } > > + g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000); > > + } > > + > > + if (i == 1000) { > > + qdev_unplug(&vdev->pdev.qdev, NULL); > > This looks a bit precarious to me, but I guess in the end we're only > really sending an attention button press on the hotplug slot. Have you > forced this code path in testing and does the right thing happen for > both Windows and Linux guests? If all else fails, require management to specify the timeout. > > + } else { > > + vfio_pci_hot_reset(vdev, vdev->single_depend_dev); > > + } > > + vdev->pci_aer_error_signaled = false; > > + } > > } > > return; > > } > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > > index ed14322..c9e0202 100644 > > --- a/hw/vfio/pci.h > > +++ b/hw/vfio/pci.h > > @@ -148,6 +148,7 @@ typedef struct VFIOPCIDevice { > > bool no_kvm_msi; > > bool no_kvm_msix; > > bool single_depend_dev; > > + bool pci_aer_error_signaled; > > } VFIOPCIDevice; > > > > uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h > > index 759b850..9295fca 100644 > > --- a/linux-headers/linux/vfio.h > > +++ b/linux-headers/linux/vfio.h > > @@ -198,6 +198,10 @@ struct vfio_device_info { > > #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ > > #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device > > */ > > #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ > > +/* support aer error progress */ > > +#define VFIO_DEVICE_FLAGS_AERPROCESS (1 << 4) > > +/* status in aer error progress */ > > +#define VFIO_DEVICE_FLAGS_INAERPROCESS (1 << 5) > > __u32 num_regions; /* Max region index + 1 */ > > __u32 num_irqs; /* Max IRQ index + 1 */ > > };