On 3/9/26 07:08, Mykyta Poturai wrote:
> There is no need to store ranges for each PCI device, as they are only
> used during the mapping/unmapping process and can be reused for each
> device. This also allows to avoid the need to allocate and destroy
> rangesets for each device.
>
Thank you for this.
Consider adding this tag:
Amends: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
> Signed-off-by: Mykyta Poturai <[email protected]>
> ---
> v1->v2:
> * new patch
> ---
> xen/common/domain.c | 24 ++++++++++++++
> xen/drivers/vpci/header.c | 66 ++++++++++++++-------------------------
> xen/drivers/vpci/vpci.c | 3 --
> xen/include/xen/vpci.h | 4 ++-
> 4 files changed, 51 insertions(+), 46 deletions(-)
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index e06174fca7..76b0163616 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -454,6 +454,14 @@ static int vcpu_teardown(struct vcpu *v)
> */
> static void vcpu_destroy(struct vcpu *v)
> {
> +#ifdef CONFIG_HAS_VPCI
> + int i;
> +
> + for ( i = 0; i < ARRAY_SIZE(v->vpci.bar_mem); i++ )
> + if ( v->vpci.bar_mem[i] )
> + rangeset_destroy(v->vpci.bar_mem[i]);
Paraphrasing some previous feedback from [1], you might additionally want:
v->vpci.bar_mem[i] = NULL;
or introduce a RANGESET_DESTROY() similar to XFREE().
[1]
https://lore.kernel.org/xen-devel/[email protected]/T/#t
> +
> +#endif
> free_vcpu_struct(v);
> }
>
> @@ -511,6 +519,22 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int
> vcpu_id)
> if ( arch_vcpu_create(v) != 0 )
> goto fail_sched;
>
> +#ifdef CONFIG_HAS_VPCI
Would it make sense to introduce a vpci_* function instead of #ifdef? Same for
the deallocation above.
> + {
I'm not sure it's necessary to start a new block here.
> + int i;
> +
> + for ( i = 0; i < ARRAY_SIZE(v->vpci.bar_mem); i++ )
> + {
> + char str[32];
> +
> + snprintf(str, sizeof(str), "%pv:BAR%u", v, i);
> + v->vpci.bar_mem[i] = rangeset_new(d, str, RANGESETF_no_print);
This seems to be performing an allocation for vPCI even for domains that don't
have vPCI.
> + if ( !v->vpci.bar_mem[i] )
> + goto fail_sched;
> + }
> + }
> +#endif
> +
> d->vcpu[vcpu_id] = v;
> if ( vcpu_id != 0 )
> {
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 07ec991a12..cb64d9b9fc 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -195,6 +195,7 @@ bool vpci_process_pending(struct vcpu *v)
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = v->vpci.bar_mem[i];
> struct map_data data = {
> .d = v->domain,
> .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
> @@ -202,10 +203,10 @@ bool vpci_process_pending(struct vcpu *v)
> };
> int rc;
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - rc = rangeset_consume_ranges(bar->mem, map_range, &data);
> + rc = rangeset_consume_ranges(mem, map_range, &data);
>
> if ( rc == -ERESTART )
> {
> @@ -223,8 +224,8 @@ bool vpci_process_pending(struct vcpu *v)
>
> /* Clean all the rangesets */
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> - if ( !rangeset_is_empty(header->bars[i].mem) )
> - rangeset_purge(header->bars[i].mem);
> + if ( !rangeset_is_empty(v->vpci.bar_mem[i]) )
> + rangeset_purge(v->vpci.bar_mem[i]);
>
> v->vpci.pdev = NULL;
>
> @@ -259,13 +260,14 @@ static int __init apply_map(struct domain *d, const
> struct pci_dev *pdev,
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = current->vpci.bar_mem[i];
> struct map_data data = { .d = d, .map = true, .bar = bar };
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
> - &data)) == -ERESTART )
> + while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) ==
> + -ERESTART )
> {
> /*
> * It's safe to drop and reacquire the lock in this context
> @@ -330,12 +332,13 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> {
> struct vpci_bar *bar = &header->bars[i];
> + struct rangeset *mem = current->vpci.bar_mem[i];
> unsigned long start = PFN_DOWN(bar->addr);
> unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
> unsigned long start_guest = PFN_DOWN(bar->guest_addr);
> unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
>
> - if ( !bar->mem )
> + if ( !mem )
> continue;
>
> if ( !MAPPABLE_BAR(bar) ||
> @@ -353,7 +356,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> continue;
> }
>
> - ASSERT(rangeset_is_empty(bar->mem));
> + ASSERT(rangeset_is_empty(mem));
>
> /*
> * Make sure that the guest set address has the same page offset
> @@ -368,7 +371,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> return -EINVAL;
> }
>
> - rc = rangeset_add_range(bar->mem, start_guest, end_guest);
> + rc = rangeset_add_range(mem, start_guest, end_guest);
> if ( rc )
> {
> printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
> @@ -379,12 +382,12 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> /* Check for overlap with the already setup BAR ranges. */
> for ( j = 0; j < i; j++ )
> {
> - struct vpci_bar *prev_bar = &header->bars[j];
> + struct rangeset *prev_mem = current->vpci.bar_mem[j];
>
> - if ( rangeset_is_empty(prev_bar->mem) )
> + if ( rangeset_is_empty(prev_mem) )
> continue;
>
> - rc = rangeset_remove_range(prev_bar->mem, start_guest,
> end_guest);
> + rc = rangeset_remove_range(prev_mem, start_guest, end_guest);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -394,7 +397,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> }
> }
>
> - rc = pci_sanitize_bar_memory(bar->mem);
> + rc = pci_sanitize_bar_memory(mem);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -411,14 +414,14 @@ int vpci_modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
> vmsix_table_size(pdev->vpci, i) - 1);
>
> - for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
> + for ( j = 0; j < ARRAY_SIZE(current->vpci.bar_mem); j++ )
> {
> - const struct vpci_bar *bar = &header->bars[j];
> + struct rangeset *mem = current->vpci.bar_mem[j];
>
> - if ( rangeset_is_empty(bar->mem) )
> + if ( rangeset_is_empty(mem) )
> continue;
>
> - rc = rangeset_remove_range(bar->mem, start, end);
> + rc = rangeset_remove_range(mem, start, end);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -468,8 +471,9 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
> {
> const struct vpci_bar *bar = &header->bars[j];
> + struct rangeset *mem = current->vpci.bar_mem[j];
>
> - if ( !rangeset_overlaps_range(bar->mem, start, end) ||
> + if ( !rangeset_overlaps_range(mem, start, end) ||
> /*
> * If only the ROM enable bit is toggled check
> against
> * other BARs in the same device for overlaps, but
> not
> @@ -480,7 +484,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t
> cmd, bool rom_only)
> bar->type == VPCI_BAR_ROM) )
> continue;
>
> - rc = rangeset_remove_range(bar->mem, start, end);
> + rc = rangeset_remove_range(mem, start, end);
> if ( rc )
> {
> gprintk(XENLOG_WARNING,
> @@ -733,18 +737,6 @@ static void cf_check rom_write(
> }
> }
>
> -static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar,
> - unsigned int i)
> -{
> - char str[32];
> -
> - snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
> -
> - bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print);
> -
> - return !bar->mem ? -ENOMEM : 0;
> -}
> -
> static int vpci_init_capability_list(struct pci_dev *pdev)
> {
> int rc;
> @@ -989,10 +981,6 @@ int vpci_init_header(struct pci_dev *pdev)
> else
> bars[i].type = VPCI_BAR_MEM32;
>
> - rc = bar_add_rangeset(pdev, &bars[i], i);
> - if ( rc )
> - goto fail;
> -
> rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
> (i == num_bars - 1) ? PCI_BAR_LAST : 0);
> if ( rc < 0 )
> @@ -1046,12 +1034,6 @@ int vpci_init_header(struct pci_dev *pdev)
> 4, rom);
> if ( rc )
> rom->type = VPCI_BAR_EMPTY;
> - else
> - {
> - rc = bar_add_rangeset(pdev, rom, num_bars);
> - if ( rc )
> - goto fail;
> - }
> }
> else if ( !is_hwdom )
> {
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index f66f50c8ba..af61b521b0 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -357,9 +357,6 @@ void vpci_deassign_device(struct pci_dev *pdev)
> }
> spin_unlock(&pdev->vpci->lock);
>
> - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
> - rangeset_destroy(pdev->vpci->header.bars[i].mem);
> -
> xfree(pdev->vpci);
> pdev->vpci = NULL;
> }
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index dd233b8b03..fa654545e5 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -118,7 +118,6 @@ struct vpci {
> uint64_t guest_addr;
> uint64_t size;
> uint64_t resizable_sizes;
> - struct rangeset *mem;
> enum {
> VPCI_BAR_EMPTY,
> VPCI_BAR_IO,
> @@ -212,6 +211,9 @@ struct vpci {
> struct vpci_vcpu {
> /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
> const struct pci_dev *pdev;
> +#ifdef __XEN__
Why not enclose the whole vpci_vcpu struct inside __XEN__?
> + struct rangeset *bar_mem[PCI_HEADER_NORMAL_NR_BARS + 1];
Maybe add a brief comment explaining the + 1 ?
> +#endif
> uint16_t cmd;
> bool rom_only : 1;
> };