On Thu, Feb 17, 2022 at 02:49:02AM -0500, Jagannathan Raman wrote: > Forward remote device's interrupts to the guest > > Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> > Signed-off-by: John G Johnson <john.g.john...@oracle.com> > Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> > --- > include/hw/pci/pci.h | 6 ++ > include/hw/remote/vfio-user-obj.h | 6 ++ > hw/pci/msi.c | 13 +++- > hw/pci/msix.c | 12 +++- > hw/remote/machine.c | 11 +-- > hw/remote/vfio-user-obj.c | 107 ++++++++++++++++++++++++++++++ > stubs/vfio-user-obj.c | 6 ++ > MAINTAINERS | 1 + > hw/remote/trace-events | 1 + > stubs/meson.build | 1 + > 10 files changed, 158 insertions(+), 6 deletions(-) > create mode 100644 include/hw/remote/vfio-user-obj.h > create mode 100644 stubs/vfio-user-obj.c > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index c3f3c90473..d42d526a48 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -129,6 +129,8 @@ typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, > typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, > pcibus_t addr, pcibus_t size, int type); > typedef void PCIUnregisterFunc(PCIDevice *pci_dev); > +typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector); > +typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector); > > typedef struct PCIIORegion { > pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ > @@ -323,6 +325,10 @@ struct PCIDevice { > /* Space to store MSIX table & pending bit array */ > uint8_t *msix_table; > uint8_t *msix_pba; > + > + PCIMSINotify *msi_notify; > + PCIMSIxNotify *msix_notify; > + > /* MemoryRegion container for msix exclusive BAR setup */ > MemoryRegion msix_exclusive_bar; > /* Memory Regions for MSIX table and pending bit entries. */ > diff --git a/include/hw/remote/vfio-user-obj.h > b/include/hw/remote/vfio-user-obj.h > new file mode 100644 > index 0000000000..87ab78b875 > --- /dev/null > +++ b/include/hw/remote/vfio-user-obj.h > @@ -0,0 +1,6 @@ > +#ifndef VFIO_USER_OBJ_H > +#define VFIO_USER_OBJ_H > + > +void vfu_object_set_bus_irq(PCIBus *pci_bus); > + > +#endif > diff --git a/hw/pci/msi.c b/hw/pci/msi.c > index 47d2b0f33c..93f5e400cc 100644 > --- a/hw/pci/msi.c > +++ b/hw/pci/msi.c > @@ -51,6 +51,8 @@ > */ > bool msi_nonbroken; > > +static void pci_msi_notify(PCIDevice *dev, unsigned int vector); > + > /* If we get rid of cap allocator, we won't need this. */ > static inline uint8_t msi_cap_sizeof(uint16_t flags) > { > @@ -225,6 +227,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, > dev->msi_cap = config_offset; > dev->cap_present |= QEMU_PCI_CAP_MSI; > > + dev->msi_notify = pci_msi_notify;
Are you sure it's correct to skip the msi_is_masked() logic? I think the callback function should only override the behavior of msi_send_message(), not the entire msi_notify() function. The same applies to MSI-X. > + > pci_set_word(dev->config + msi_flags_off(dev), flags); > pci_set_word(dev->wmask + msi_flags_off(dev), > PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); > @@ -307,7 +311,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int > vector) > return mask & (1U << vector); > } > > -void msi_notify(PCIDevice *dev, unsigned int vector) > +static void pci_msi_notify(PCIDevice *dev, unsigned int vector) > { > uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; > @@ -332,6 +336,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector) > msi_send_message(dev, msg); > } > > +void msi_notify(PCIDevice *dev, unsigned int vector) > +{ > + if (dev->msi_notify) { Can this ever be NULL? > + dev->msi_notify(dev, vector); > + } > +} > + > void msi_send_message(PCIDevice *dev, MSIMessage msg) > { > MemTxAttrs attrs = {}; > diff --git a/hw/pci/msix.c b/hw/pci/msix.c > index ae9331cd0b..1c71e67f53 100644 > --- a/hw/pci/msix.c > +++ b/hw/pci/msix.c > @@ -31,6 +31,8 @@ > #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) > #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) > > +static void pci_msix_notify(PCIDevice *dev, unsigned vector); > + > MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) > { > uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; > @@ -334,6 +336,7 @@ int msix_init(struct PCIDevice *dev, unsigned short > nentries, > dev->msix_table = g_malloc0(table_size); > dev->msix_pba = g_malloc0(pba_size); > dev->msix_entry_used = g_malloc0(nentries * sizeof > *dev->msix_entry_used); > + dev->msix_notify = pci_msix_notify; > > msix_mask_all(dev, nentries); > > @@ -485,7 +488,7 @@ int msix_enabled(PCIDevice *dev) > } > > /* Send an MSI-X message */ > -void msix_notify(PCIDevice *dev, unsigned vector) > +static void pci_msix_notify(PCIDevice *dev, unsigned vector) > { > MSIMessage msg; > > @@ -503,6 +506,13 @@ void msix_notify(PCIDevice *dev, unsigned vector) > msi_send_message(dev, msg); > } > > +void msix_notify(PCIDevice *dev, unsigned vector) > +{ > + if (dev->msix_notify) { Can this ever be NULL? > + dev->msix_notify(dev, vector); > + } > +} > + > void msix_reset(PCIDevice *dev) > { > if (!msix_present(dev)) { > diff --git a/hw/remote/machine.c b/hw/remote/machine.c > index db4ae30710..a8b4a3aef3 100644 > --- a/hw/remote/machine.c > +++ b/hw/remote/machine.c > @@ -23,6 +23,7 @@ > #include "hw/remote/iohub.h" > #include "hw/qdev-core.h" > #include "hw/remote/iommu.h" > +#include "hw/remote/vfio-user-obj.h" > > static void remote_machine_init(MachineState *machine) > { > @@ -54,12 +55,14 @@ static void remote_machine_init(MachineState *machine) > > if (s->vfio_user) { > remote_configure_iommu(pci_host->bus); > - } > > - remote_iohub_init(&s->iohub); > + vfu_object_set_bus_irq(pci_host->bus); > + } else { > + remote_iohub_init(&s->iohub); > > - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, > - &s->iohub, REMOTE_IOHUB_NB_PIRQS); > + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, > remote_iohub_map_irq, > + &s->iohub, REMOTE_IOHUB_NB_PIRQS); > + } > > qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); > } > diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c > index 2feabd06a4..d79bab87f1 100644 > --- a/hw/remote/vfio-user-obj.c > +++ b/hw/remote/vfio-user-obj.c > @@ -54,6 +54,9 @@ > #include "hw/pci/pci.h" > #include "qemu/timer.h" > #include "exec/memory.h" > +#include "hw/pci/msi.h" > +#include "hw/pci/msix.h" > +#include "hw/remote/vfio-user-obj.h" > > #define TYPE_VFU_OBJECT "x-vfio-user-server" > OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) > @@ -107,6 +110,10 @@ struct VfuObject { > int vfu_poll_fd; > }; > > +static GHashTable *vfu_object_bdf_to_ctx_table; I suggest adding a void *msi_notify_opaque field to PCIDevice and passing the value as an argument to ->msi_notify(). vfio-user-obj.c can set the value to vfu_ctx and eliminate the vfu_object_bdf_to_ctx_table hash table. This simplifies the code, makes it faster, and solves race conditions during hot plug/unplug if other instances are running in IOThreads. > + > +#define INT2VOIDP(i) (void *)(uintptr_t)(i) > + > static void vfu_object_init_ctx(VfuObject *o, Error **errp); > > static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, > @@ -463,6 +470,86 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, > PCIDevice *pdev) > } > } > > +static void vfu_object_irq_trigger(int pci_bdf, unsigned vector) > +{ > + vfu_ctx_t *vfu_ctx = NULL; > + > + if (!vfu_object_bdf_to_ctx_table) { Can this ever be NULL? > + return; > + } > + > + vfu_ctx = g_hash_table_lookup(vfu_object_bdf_to_ctx_table, > + INT2VOIDP(pci_bdf)); > + > + if (vfu_ctx) { > + vfu_irq_trigger(vfu_ctx, vector); > + } > +} > + > +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) > +{ > + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), > + pci_dev->devfn); > + > + return pci_bdf; > +} > + > +static void vfu_object_set_irq(void *opaque, int pirq, int level) > +{ > + if (level) { > + vfu_object_irq_trigger(pirq, 0); > + } > +} > + > +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector) > +{ > + int pci_bdf; > + > + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), > pci_dev->devfn); > + > + vfu_object_irq_trigger(pci_bdf, vector); > +} > + > +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) > +{ > + vfu_ctx_t *vfu_ctx = o->vfu_ctx; > + int ret, pci_bdf; > + > + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); > + if (ret < 0) { > + return ret; > + } > + > + ret = 0; > + if (msix_nr_vectors_allocated(pci_dev)) { > + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, > + msix_nr_vectors_allocated(pci_dev)); > + > + pci_dev->msix_notify = vfu_object_msi_notify; > + } else if (msi_nr_vectors_allocated(pci_dev)) { > + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, > + msi_nr_vectors_allocated(pci_dev)); > + > + pci_dev->msi_notify = vfu_object_msi_notify; > + } > + > + if (ret < 0) { > + return ret; > + } > + > + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), > pci_dev->devfn); > + > + g_hash_table_insert(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf), > + o->vfu_ctx); > + > + return 0; > +} > + > +void vfu_object_set_bus_irq(PCIBus *pci_bus) > +{ > + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, NULL, 1); > +} > + > /* > * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' > * properties. It also depends on devices instantiated in QEMU. These > @@ -559,6 +646,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error > **errp) > > vfu_object_register_bars(o->vfu_ctx, o->pci_dev); > > + ret = vfu_object_setup_irqs(o, o->pci_dev); > + if (ret < 0) { > + error_setg(errp, "vfu: Failed to setup interrupts for %s", > + o->device); > + goto fail; > + } > + > ret = vfu_realize_ctx(o->vfu_ctx); > if (ret < 0) { > error_setg(errp, "vfu: Failed to realize device %s- %s", > @@ -612,6 +706,7 @@ static void vfu_object_finalize(Object *obj) > { > VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); > VfuObject *o = VFU_OBJECT(obj); > + int pci_bdf; > > k->nr_devs--; > > @@ -638,9 +733,17 @@ static void vfu_object_finalize(Object *obj) > o->unplug_blocker = NULL; > } > > + if (o->pci_dev) { > + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(o->pci_dev)), > + o->pci_dev->devfn); > + g_hash_table_remove(vfu_object_bdf_to_ctx_table, INT2VOIDP(pci_bdf)); > + } > + > o->pci_dev = NULL; > > if (!k->nr_devs && k->auto_shutdown) { > + g_hash_table_destroy(vfu_object_bdf_to_ctx_table); > + vfu_object_bdf_to_ctx_table = NULL; > qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); > } > > @@ -658,6 +761,10 @@ static void vfu_object_class_init(ObjectClass *klass, > void *data) > > k->auto_shutdown = true; > > + msi_nonbroken = true; This should go in hw/remote/machine.c. It's a global variable related to the machine's interrupt controller capabilities. The value is not related to vfu_object_class_init(), which will be called by any QEMU binary that links hw/remote/vfio-user-obj.o regardless of which machine type is instantiated. > + > + vfu_object_bdf_to_ctx_table = g_hash_table_new_full(NULL, NULL, NULL, > NULL); > + > object_class_property_add(klass, "socket", "SocketAddress", NULL, > vfu_object_set_socket, NULL, NULL); > object_class_property_set_description(klass, "socket", > diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c > new file mode 100644 > index 0000000000..79100d768e > --- /dev/null > +++ b/stubs/vfio-user-obj.c > @@ -0,0 +1,6 @@ > +#include "qemu/osdep.h" > +#include "hw/remote/vfio-user-obj.h" > + > +void vfu_object_set_bus_irq(PCIBus *pci_bus) > +{ > +} > diff --git a/MAINTAINERS b/MAINTAINERS > index f47232c78c..e274cb46af 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -3569,6 +3569,7 @@ F: hw/remote/iohub.c > F: include/hw/remote/iohub.h > F: subprojects/libvfio-user > F: hw/remote/vfio-user-obj.c > +F: include/hw/remote/vfio-user-obj.h > F: hw/remote/iommu.c > F: include/hw/remote/iommu.h > > diff --git a/hw/remote/trace-events b/hw/remote/trace-events > index 847d50d88f..c167b3c7a5 100644 > --- a/hw/remote/trace-events > +++ b/hw/remote/trace-events > @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA > 0x%"PRIx64"" > vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr > 0x%"PRIx64" size 0x%"PRIx64"" > vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR > address 0x%"PRIx64"" > vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR > address 0x%"PRIx64"" > +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d" > diff --git a/stubs/meson.build b/stubs/meson.build > index d359cbe1ad..c5ce979dc3 100644 > --- a/stubs/meson.build > +++ b/stubs/meson.build > @@ -57,3 +57,4 @@ if have_system > else > stub_ss.add(files('qdev.c')) > endif > +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: > files('vfio-user-obj.c')) > -- > 2.20.1 >
signature.asc
Description: PGP signature