> On Apr 13, 2022, at 2:24 PM, Jag Raman <jag.ra...@oracle.com> wrote: > > > >> On Apr 13, 2022, at 10:25 AM, Igor Mammedov <imamm...@redhat.com> wrote: >> >> On Fri, 25 Mar 2022 15:19:41 -0400 >> Jagannathan Raman <jag.ra...@oracle.com> wrote: >> >>> Assign separate address space for each device in the remote processes. >>> >>> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> >>> Signed-off-by: John G Johnson <john.g.john...@oracle.com> >>> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> >>> --- >>> include/hw/remote/iommu.h | 18 ++++++++ >>> hw/remote/iommu.c | 95 +++++++++++++++++++++++++++++++++++++++ >>> MAINTAINERS | 2 + >>> hw/remote/meson.build | 1 + >>> 4 files changed, 116 insertions(+) >>> create mode 100644 include/hw/remote/iommu.h >>> create mode 100644 hw/remote/iommu.c >>> >>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h >>> new file mode 100644 >>> index 0000000000..8f850400f1 >>> --- /dev/null >>> +++ b/include/hw/remote/iommu.h >>> @@ -0,0 +1,18 @@ >>> +/** >>> + * Copyright © 2022 Oracle and/or its affiliates. >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>> later. >>> + * See the COPYING file in the top-level directory. >>> + * >>> + */ >>> + >>> +#ifndef REMOTE_IOMMU_H >>> +#define REMOTE_IOMMU_H >>> + >>> +#include "hw/pci/pci_bus.h" >>> + >>> +void remote_configure_iommu(PCIBus *pci_bus); >>> + >>> +void remote_iommu_del_device(PCIDevice *pci_dev); >>> + >>> +#endif >>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c >>> new file mode 100644 >>> index 0000000000..13f329b45d >>> --- /dev/null >>> +++ b/hw/remote/iommu.c >>> @@ -0,0 +1,95 @@ >>> +/** >>> + * IOMMU for remote device >>> + * >>> + * Copyright © 2022 Oracle and/or its affiliates. >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>> later. >>> + * See the COPYING file in the top-level directory. >>> + * >>> + */ >>> + >>> +#include "qemu/osdep.h" >>> +#include "qemu-common.h" >>> + >>> +#include "hw/remote/iommu.h" >>> +#include "hw/pci/pci_bus.h" >>> +#include "hw/pci/pci.h" >>> +#include "exec/memory.h" >>> +#include "exec/address-spaces.h" >>> +#include "trace.h" >>> + >>> +struct RemoteIommuElem { >>> + AddressSpace as; >>> + MemoryRegion mr; >>> +}; >>> + >>> +struct RemoteIommuTable { >>> + QemuMutex lock; >>> + GHashTable *elem_by_bdf; >>> +} remote_iommu_table; >>> + >>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i) >>> + >>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, >>> + void *opaque, int devfn) >>> +{ >>> + struct RemoteIommuTable *iommu_table = opaque; >>> + struct RemoteIommuElem *elem = NULL; >>> + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_bus), devfn); >>> + >>> + elem = g_hash_table_lookup(iommu_table->elem_by_bdf, >>> INT2VOIDP(pci_bdf)); >>> + >>> + if (!elem) { >>> + g_autofree char *mr_name = g_strdup_printf("vfu-ram-%d", pci_bdf); >>> + g_autofree char *as_name = g_strdup_printf("vfu-as-%d", pci_bdf); >>> + >>> + elem = g_malloc0(sizeof(struct RemoteIommuElem)); >>> + >>> + memory_region_init(&elem->mr, NULL, mr_name, UINT64_MAX); >> goes here: >> memory_region_do_init() >> if (!owner) { >> >> owner = container_get(qdev_get_machine(), "/unattached"); >> >> } >> >> then >> >>> + address_space_init(&elem->as, &elem->mr, as_name); >>> + >>> + qemu_mutex_lock(&iommu_table->lock); >>> + g_hash_table_insert(iommu_table->elem_by_bdf, INT2VOIDP(pci_bdf), >>> elem); >>> + qemu_mutex_unlock(&iommu_table->lock); >>> + } >>> + >>> + return &elem->as; >>> +} >>> + >>> +static void remote_iommu_del_elem(gpointer data) >>> +{ >>> + struct RemoteIommuElem *elem = data; >>> + >>> + g_assert(elem); >>> + >>> + memory_region_unref(&elem->mr); >> >> here we call >> object_unref(mr->owner); >> leaving dangling pointer in owner '(qdev_get_machine(), "/unattached")' >> it doesn't look correct >> >> I thought that memory_region_unref() should be always paired with >> memory_region_ref() >> >> and looking at memory_region_init(...owner...) history it looks like >> owner-less (NULL) regions are not meant to be deleted ever. > > Hi Igor, > > Thanks for the pointers about ref counters for MemoryRegions. > > It makes sense - MemoryRegions are not QEMU Objects. So their > owner’s ref counters are used instead. So the expectation is that > when the owner is destroyed, the MemoryRegions initialized by them > also get destroyed simultaneously.
Well, MemoryRegions are indeed QEMU objects - "memory_region_init() -> object_initialize()" initializes the object. So we should be able to unref the MemoryRegion object directly. We could make the PCIDevice as the owner of its IOMMU region - when the device is finalized, its region would be finalized as well. Given the above, I don’t think we would need a separate delete function (such as remote_iommu_del_device()). When the device is unplugged, its MemoryRegion would be finalized automatically. Thank you! -- Jag > > In this case, RemoteIommuElem->mr does not have an owner. Therefore, > we don’t have to manage its ref counter. When RemoteIommuElem is > destroyed, the MemoryRegion should be cleaned up automatically. > > -- > Jag > >> >>> + address_space_destroy(&elem->as); >>> + >>> + g_free(elem); >>> +} >>> + >>> +void remote_iommu_del_device(PCIDevice *pci_dev) >>> +{ >>> + int pci_bdf; >>> + >>> + if (!remote_iommu_table.elem_by_bdf || !pci_dev) { >>> + return; >>> + } >>> + >>> + pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), >>> pci_dev->devfn); >>> + >>> + qemu_mutex_lock(&remote_iommu_table.lock); >>> + g_hash_table_remove(remote_iommu_table.elem_by_bdf, >>> INT2VOIDP(pci_bdf)); >>> + qemu_mutex_unlock(&remote_iommu_table.lock); >>> +} >>> + >>> +void remote_configure_iommu(PCIBus *pci_bus) >>> +{ >>> + if (!remote_iommu_table.elem_by_bdf) { >>> + remote_iommu_table.elem_by_bdf = >>> + g_hash_table_new_full(NULL, NULL, NULL, remote_iommu_del_elem); >>> + qemu_mutex_init(&remote_iommu_table.lock); >>> + } >>> + >>> + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, >>> &remote_iommu_table); >>> +} >>> diff --git a/MAINTAINERS b/MAINTAINERS >>> index e7b0297a63..21694a9698 100644 >>> --- a/MAINTAINERS >>> +++ b/MAINTAINERS >>> @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c >>> F: include/hw/remote/iohub.h >>> F: subprojects/libvfio-user >>> F: hw/remote/vfio-user-obj.c >>> +F: hw/remote/iommu.c >>> +F: include/hw/remote/iommu.h >>> >>> EBPF: >>> M: Jason Wang <jasow...@redhat.com> >>> diff --git a/hw/remote/meson.build b/hw/remote/meson.build >>> index 534ac5df79..bcef83c8cc 100644 >>> --- a/hw/remote/meson.build >>> +++ b/hw/remote/meson.build >>> @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: >>> files('message.c')) >>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) >>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) >>> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) >>> +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) >>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: >>> files('vfio-user-obj.c')) >>> >>> remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)