>-----Original Message-----
>From: Steven Sistare <steven.sist...@oracle.com>
>Subject: Re: [PATCH V3 36/42] vfio/iommufd: preserve descriptors
>
>On 5/16/2025 6:06 AM, Duan, Zhenzhong wrote:
>>> -----Original Message-----
>>> From: Steve Sistare <steven.sist...@oracle.com>
>>> Subject: [PATCH V3 36/42] vfio/iommufd: preserve descriptors
>>>
>>> Save the iommu and vfio device fd in CPR state when it is created.
>>> After CPR, the fd number is found in CPR state and reused. Remember
>>> the reused status for subsequent patches. The reused status is cleared
>>> when vmstate load finishes.
>>>
>>> Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
>>> ---
>>> backends/iommufd.c | 19 ++++++++++---------
>>> hw/vfio/cpr-iommufd.c | 16 ++++++++++++++++
>>> hw/vfio/device.c | 10 ++--------
>>> hw/vfio/iommufd.c | 13 +++++++++++--
>>> include/system/iommufd.h | 1 +
>>> 5 files changed, 40 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>>> index 6fed1c1..492747c 100644
>>> --- a/backends/iommufd.c
>>> +++ b/backends/iommufd.c
>>> @@ -16,12 +16,18 @@
>>> #include "qemu/module.h"
>>> #include "qom/object_interfaces.h"
>>> #include "qemu/error-report.h"
>>> +#include "migration/cpr.h"
>>> #include "monitor/monitor.h"
>>> #include "trace.h"
>>> #include "hw/vfio/vfio-device.h"
>>> #include <sys/ioctl.h>
>>> #include <linux/iommufd.h>
>>>
>>> +static const char *iommufd_fd_name(IOMMUFDBackend *be)
>>> +{
>>> + return object_get_canonical_path_component(OBJECT(be));
>>> +}
>>> +
>>> static void iommufd_backend_init(Object *obj)
>>> {
>>> IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
>>> @@ -47,9 +53,8 @@ static void iommufd_backend_set_fd(Object *obj, const
>>> char *str, Error **errp)
>>> IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
>>> int fd = -1;
>>>
>>> - fd = monitor_fd_param(monitor_cur(), str, errp);
>>> + fd = cpr_get_fd_param(iommufd_fd_name(be), str, 0, &be->cpr_reused,
>errp);
>>> if (fd == -1) {
>>> - error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> return;
>>> }
>>> be->fd = fd;
>>> @@ -95,14 +100,9 @@ bool iommufd_change_process(IOMMUFDBackend
>*be,
>>> Error **errp)
>>>
>>> bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
>>> {
>>> - int fd;
>>> -
>>> if (be->owned && !be->users) {
>>> - fd = qemu_open("/dev/iommu", O_RDWR, errp);
>>> - if (fd < 0) {
>>> - return false;
>>> - }
>>> - be->fd = fd;
>>> + be->fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be),
>0,
>>> + &be->cpr_reused, errp);
>>
>> Need to check error before assign to be->fd.
>
>will do.
>
>>> }
>>> be->users++;
>>>
>>> @@ -121,6 +121,7 @@ void
>iommufd_backend_disconnect(IOMMUFDBackend
>>> *be)
>>> be->fd = -1;
>>> }
>>> out:
>>> + cpr_delete_fd(iommufd_fd_name(be), 0);
>>> trace_iommufd_backend_disconnect(be->fd, be->users);
>>> }
>>>
>>> diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
>>> index 46f2006..b760bd3 100644
>>> --- a/hw/vfio/cpr-iommufd.c
>>> +++ b/hw/vfio/cpr-iommufd.c
>>> @@ -8,6 +8,7 @@
>>> #include "qemu/osdep.h"
>>> #include "qapi/error.h"
>>> #include "hw/vfio/vfio-cpr.h"
>>> +#include "hw/vfio/vfio-device.h"
>>> #include "migration/blocker.h"
>>> #include "migration/cpr.h"
>>> #include "migration/migration.h"
>>> @@ -25,10 +26,25 @@ static bool
>vfio_cpr_supported(VFIOIOMMUFDContainer
>>> *container, Error **errp)
>>> return true;
>>> }
>>>
>>> +static int vfio_container_post_load(void *opaque, int version_id)
>>> +{
>>> + VFIOIOMMUFDContainer *container = opaque;
>>> + VFIOContainerBase *bcontainer = &container->bcontainer;
>>> + VFIODevice *vbasedev;
>>> +
>>> + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
>>> + vbasedev->cpr.reused = false;
>>> + }
>>> + container->be->cpr_reused = false;
>>
>> It's strange to set iommufd and vfio device's reused in container's post
>> load,
>> Maybe better to do it in their own post load handler?
>
>vfio_container_post_load has MIG_PRI_LOW so it is called last, which guarantees
>that be->cpr_reused remains true while all devices are loaded. This is
>required
>so that we supress dma_map calls during device load processing:
>
> iommufd_backend_map_file_dma()
> if (be->cpr_reused)
> return 0;
>
>"vbasedev->cpr.reused = false" could be moved to vfio_device_post_load.
>I put it here to be future proof -- al reused flags are cleared together,
>at the end of post_load, and to be consistent with cpr-
>legacy.c:vfio_container_post_load
OK
>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static const VMStateDescription vfio_container_vmstate = {
>>> .name = "vfio-iommufd-container",
>>> .version_id = 0,
>>> .minimum_version_id = 0,
>>> + .post_load = vfio_container_post_load,
>>> .needed = cpr_needed_for_reuse,
>>> .fields = (VMStateField[]) {
>>> VMSTATE_END_OF_LIST()
>>> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
>>> index 8e9de68..02f384e 100644
>>> --- a/hw/vfio/device.c
>>> +++ b/hw/vfio/device.c
>>> @@ -312,14 +312,8 @@ bool vfio_device_get_name(VFIODevice *vbasedev,
>>> Error **errp)
>>>
>>> void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
>>> {
>>> - ERRP_GUARD();
>>> - int fd = monitor_fd_param(monitor_cur(), str, errp);
>>> -
>>> - if (fd < 0) {
>>> - error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> - return;
>>> - }
>>> - vbasedev->fd = fd;
>>> + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0,
>>> + &vbasedev->cpr.reused, errp);
>>
>> Same here.
>
>Do you mean, "need to check error"?
>If so, no need. The new function definition is:
>
>void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
>{
> vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0,
> &vbasedev->cpr.reused, errp);
>}
>
>cpr_get_fd_param() returns -1 on error and sets errp.
OK.
Zhenzhong