> From: Winiarski, Michal <[email protected]>
> Sent: Wednesday, October 22, 2025 6:42 AM
> +
> +/**
> + * struct xe_vfio_pci_migration_file - file used for reading / writing 
> migration
> data
> + */

let's use the comment style in vfio, i.e. "/*" instead of "/**"

> +struct xe_vfio_pci_migration_file {
> +     /** @filp: pointer to underlying &struct file */
> +     struct file *filp;
> +     /** @lock: serializes accesses to migration data */
> +     struct mutex lock;
> +     /** @xe_vdev: backpointer to &struct xe_vfio_pci_core_device */
> +     struct xe_vfio_pci_core_device *xe_vdev;

above comments are obvious...

> +struct xe_vfio_pci_core_device {
> +     /** @core_device: vendor-agnostic VFIO device */
> +     struct vfio_pci_core_device core_device;
> +
> +     /** @mig_state: current device migration state */
> +     enum vfio_device_mig_state mig_state;
> +
> +     /** @vfid: VF number used by PF, xe uses 1-based indexing for vfid
> */
> +     unsigned int vfid;

is 1-based indexing a sw or hw requirement?

> +
> +     /** @pf: pointer to driver_private of physical function */
> +     struct pci_dev *pf;
> +
> +     /** @fd: &struct xe_vfio_pci_migration_file for userspace to
> read/write migration data */
> +     struct xe_vfio_pci_migration_file *fd;

s/fd/migf/, as 'fd' is integer in all other places

btw it's risky w/o a lock protecting the state transition. See the usage of
state_mutex in other migration drivers.

> +static void xe_vfio_pci_reset_done(struct pci_dev *pdev)
> +{
> +     struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
> +     int ret;
> +
> +     ret = xe_sriov_vfio_wait_flr_done(xe_vdev->pf, xe_vdev->vfid);
> +     if (ret)
> +             dev_err(&pdev->dev, "Failed to wait for FLR: %d\n", ret);

why is there a device specific wait for flr done? suppose it's already
covered by pci core...

> +
> +     xe_vfio_pci_reset(xe_vdev);
> +}
> +
> +static const struct pci_error_handlers xe_vfio_pci_err_handlers = {
> +     .reset_done = xe_vfio_pci_reset_done,
> +};

missing ".error_detected "

> +static struct xe_vfio_pci_migration_file *
> +xe_vfio_pci_alloc_file(struct xe_vfio_pci_core_device *xe_vdev,
> +                    enum xe_vfio_pci_file_type type)
> +{
> +     struct xe_vfio_pci_migration_file *migf;
> +     const struct file_operations *fops;
> +     int flags;
> +
> +     migf = kzalloc(sizeof(*migf), GFP_KERNEL);
> +     if (!migf)
> +             return ERR_PTR(-ENOMEM);
> +
> +     fops = type == XE_VFIO_FILE_SAVE ? &xe_vfio_pci_save_fops :
> &xe_vfio_pci_resume_fops;
> +     flags = type == XE_VFIO_FILE_SAVE ? O_RDONLY : O_WRONLY;
> +     migf->filp = anon_inode_getfile("xe_vfio_mig", fops, migf, flags);
> +     if (IS_ERR(migf->filp)) {
> +             kfree(migf);
> +             return ERR_CAST(migf->filp);
> +     }
> +
> +     mutex_init(&migf->lock);
> +     migf->xe_vdev = xe_vdev;
> +     xe_vdev->fd = migf;
> +
> +     stream_open(migf->filp->f_inode, migf->filp);
> +
> +     return migf;

miss a get_file(). vfio core will do another fput() upon error.

see vfio_ioct_mig_return_fd()

> +}
> +
> +static struct file *
> +xe_vfio_set_state(struct xe_vfio_pci_core_device *xe_vdev, u32 new)
> +{
> +     u32 cur = xe_vdev->mig_state;
> +     int ret;
> +
> +     dev_dbg(xe_vdev_to_dev(xe_vdev),
> +             "state: %s->%s\n", vfio_dev_state_str(cur),
> vfio_dev_state_str(new));
> +
> +     /*
> +      * "STOP" handling is reused for "RUNNING_P2P", as the device
> doesn't have the capability to
> +      * selectively block p2p DMA transfers.
> +      * The device is not processing new workload requests when the VF is
> stopped, and both
> +      * memory and MMIO communication channels are transferred to
> destination (where processing
> +      * will be resumed).
> +      */
> +     if ((cur == VFIO_DEVICE_STATE_RUNNING && new ==
> VFIO_DEVICE_STATE_STOP) ||

this is not required when P2P is supported. vfio_mig_get_next_state() will
find the right arc from RUNNING to RUNNING_P2P to STOP.

> +         (cur == VFIO_DEVICE_STATE_RUNNING && new ==
> VFIO_DEVICE_STATE_RUNNING_P2P)) {
> +             ret = xe_sriov_vfio_stop(xe_vdev->pf, xe_vdev->vfid);
> +             if (ret)
> +                     goto err;
> +
> +             return NULL;
> +     }

better to align with other drivers, s/stop/suspend/ and s/run/resume/

> +
> +     if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new ==
> VFIO_DEVICE_STATE_STOP) ||
> +         (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RUNNING_P2P))
> +             return NULL;
> +
> +     if ((cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RUNNING) ||
> +         (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new ==
> VFIO_DEVICE_STATE_RUNNING)) {
> +             ret = xe_sriov_vfio_run(xe_vdev->pf, xe_vdev->vfid);
> +             if (ret)
> +                     goto err;
> +
> +             return NULL;
> +     }
> +
> +     if (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_STOP_COPY) {
> +             struct xe_vfio_pci_migration_file *migf;
> +
> +             migf = xe_vfio_pci_alloc_file(xe_vdev, XE_VFIO_FILE_SAVE);
> +             if (IS_ERR(migf)) {
> +                     ret = PTR_ERR(migf);
> +                     goto err;
> +             }
> +
> +             ret = xe_sriov_vfio_stop_copy_enter(xe_vdev->pf, xe_vdev-
> >vfid);
> +             if (ret) {
> +                     fput(migf->filp);
> +                     goto err;
> +             }
> +
> +             return migf->filp;
> +     }
> +
> +     if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new ==
> VFIO_DEVICE_STATE_STOP)) {
> +             if (xe_vdev->fd)
> +                     xe_vfio_pci_disable_file(xe_vdev->fd);
> +
> +             xe_sriov_vfio_stop_copy_exit(xe_vdev->pf, xe_vdev->vfid);
> +
> +             return NULL;
> +     }
> +
> +     if (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RESUMING) {
> +             struct xe_vfio_pci_migration_file *migf;
> +
> +             migf = xe_vfio_pci_alloc_file(xe_vdev,
> XE_VFIO_FILE_RESUME);
> +             if (IS_ERR(migf)) {
> +                     ret = PTR_ERR(migf);
> +                     goto err;
> +             }
> +
> +             ret = xe_sriov_vfio_resume_enter(xe_vdev->pf, xe_vdev-
> >vfid);
> +             if (ret) {
> +                     fput(migf->filp);
> +                     goto err;
> +             }
> +
> +             return migf->filp;
> +     }
> +
> +     if (cur == VFIO_DEVICE_STATE_RESUMING && new ==
> VFIO_DEVICE_STATE_STOP) {
> +             if (xe_vdev->fd)
> +                     xe_vfio_pci_disable_file(xe_vdev->fd);
> +
> +             xe_sriov_vfio_resume_exit(xe_vdev->pf, xe_vdev->vfid);
> +
> +             return NULL;
> +     }
> +
> +     if (new == VFIO_DEVICE_STATE_ERROR)
> +             xe_sriov_vfio_error(xe_vdev->pf, xe_vdev->vfid);

the ERROR state is not passed to the variant driver. You'll get -EINVAL 
from vfio_mig_get_next_state(). so this is dead code.

If the pf driver needs to be notified, you could check the ret value instead.

> +static void xe_vfio_pci_migration_init(struct vfio_device *core_vdev)
> +{
> +     struct xe_vfio_pci_core_device *xe_vdev =
> +             container_of(core_vdev, struct xe_vfio_pci_core_device,
> core_device.vdev);
> +     struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
> +
> +     if (!xe_sriov_vfio_migration_supported(pdev->physfn))
> +             return;
> +
> +     /* vfid starts from 1 for xe */
> +     xe_vdev->vfid = pci_iov_vf_id(pdev) + 1;

pci_iov_vf_id() returns error if it's not vf. should be checked.

> +static int xe_vfio_pci_init_dev(struct vfio_device *core_vdev)
> +{
> +     struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
> +
> +     if (pdev->is_virtfn && strcmp(pdev->physfn->dev.driver->name, "xe")
> == 0)
> +             xe_vfio_pci_migration_init(core_vdev);

I didn't see the point of checking the driver name.

> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Intel Corporation");

please use the author name, as other drivers do

Reply via email to