On 14.12.2023 00:44, Volodymyr Babchuk wrote:
> With CONFIG_HAS_VPCI_GUEST_SUPPORT enabled, hypervisor will assign a
> passed-through PCI device to a guest using virtual/guest SBDF
> number. Right now hypervisor automatically allocates next free
> SBDF. But there are cases mentioned in [1] when user should be able to
> control SBDF assigned to the passed-through device.
>
> To enable this, extend assign_device domctl call with optional
> parameter virtual_sbdf. When this parameter is set to
> XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will act as previously, but it
> will return allocated vSBDF back to the toolstack. Alternatively,
> toolstack might provide desired vSBDF and hypervisor will try to use
> it, if it is free and falls into permitted range.
>
> [1] https://lore.kernel.org/all/[email protected]/
>
> Signed-off-by: Volodymyr Babchuk <[email protected]>
It's been a hile since this was sent, so comments below may have been given
by others already. I'm sorry for the redundancy if so.
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -37,7 +37,7 @@ extern vpci_register_init_t *const __end_vpci_array[];
> #define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
>
> #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> -static int add_virtual_device(struct pci_dev *pdev)
> +static int add_virtual_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
> {
> struct domain *d = pdev->domain;
> unsigned int new_dev_number;
> @@ -57,13 +57,35 @@ static int add_virtual_device(struct pci_dev *pdev)
> &pdev->sbdf);
> return -EOPNOTSUPP;
> }
> - new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
> - VPCI_MAX_VIRT_DEV);
> - if ( new_dev_number == VPCI_MAX_VIRT_DEV )
> - return -ENOSPC;
>
> - __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
> + if ( !vsbdf || vsbdf->sbdf == XEN_DOMCTL_DEV_SDBF_ANY )
> + {
> + new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
> + VPCI_MAX_VIRT_DEV);
> + if ( new_dev_number == VPCI_MAX_VIRT_DEV )
> + return -ENOSPC;
>
> + if ( vsbdf )
> + *vsbdf = PCI_SBDF(0, 0, new_dev_number, 0);
> + }
> + else
> + {
> + if ( vsbdf->seg != 0 || vsbdf->bus != 0 || vsbdf->fn != 0 )
> + {
> + gdprintk(XENLOG_ERR,
> + "vSBDF %pp: segment, bus and function should be 0\n",
> + vsbdf);
> + return -EOPNOTSUPP;
> + }
> + new_dev_number = vsbdf->dev;
> + if ( test_bit(new_dev_number, &d->vpci_dev_assigned_map) )
> + {
> + gdprintk(XENLOG_ERR, "vSBDF %pp already assigned\n", vsbdf);
> + return -EOPNOTSUPP;
> + }
> + }
> +
> + __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
> /*
> * Both segment and bus number are 0:
> * - we emulate a single host bridge for the guest, e.g. segment 0
Please can a blank line remain to live ahead of this comment?
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -504,7 +504,12 @@ struct xen_domctl_sendtrigger {
>
>
> /* Assign a device to a guest. Sets up IOMMU structures. */
> -/* XEN_DOMCTL_assign_device */
> +/* XEN_DOMCTL_assign_device
> + * when assigning a PCI device, it is possible to either request
> + * or provide a virtual SBDF. When virtual_sbdf equals to
> + * XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will return allocated
> + * vSBDF back.
> + */
> /*
> * XEN_DOMCTL_test_assign_device: Pass DOMID_INVALID to find out whether the
> * given device is assigned to any DomU at all. Pass a specific domain ID to
> @@ -528,6 +533,8 @@ struct xen_domctl_assign_device {
> union {
> struct {
> uint32_t machine_sbdf; /* machine PCI ID of assigned device */
> + uint32_t virtual_sbdf; /* IN/OUT virtual SBDF of the device */
> +#define XEN_DOMCTL_DEV_SDBF_ANY 0xFFFFFFFF /* request a free SBDF */
> } pci;
Such a struct change needs to come with an interface version bump, I
guess.
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -33,7 +33,7 @@ typedef int vpci_register_init_t(struct pci_dev *dev);
> __used_section(".data.vpci." p) = x
>
> /* Assign vPCI to device by adding handlers to device. */
> -int __must_check vpci_assign_device(struct pci_dev *pdev);
> +int __must_check vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf);
>
> /* Remove all handlers and free vpci related structures. */
> void vpci_deassign_device(struct pci_dev *pdev);
> @@ -265,7 +265,7 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg,
> unsigned int len,
> #else /* !CONFIG_HAS_VPCI */
> struct vpci_vcpu {};
>
> -static inline int vpci_assign_device(struct pci_dev *pdev)
> +static inline int vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
> {
> return 0;
> }
Can this stub really return "success" without even touching *vsdbf? At
the very least the public header comment saying "hypervisor will return"
isn't quite right if this path is taken. Perhaps when HAS_VPCI=n there
should be a requirement for the caller to pass NULL? Yet even then the
domctl interface wouldn't do what it (currently) promises. So perhaps
you can't really extend an existing domctl here.
Jan