From: Dan Williams <dan.j.willi...@intel.com> Sent: Wednesday, July 16, 2025 
9:09 AM
> 
> The ability to emulate a host bridge is useful not only for hardware PCI
> controllers like CONFIG_VMD, or virtual PCI controllers like
> CONFIG_PCI_HYPERV, but also for test and development scenarios like
> CONFIG_SAMPLES_DEVSEC [1].
> 
> One stumbling block for defining CONFIG_SAMPLES_DEVSEC, a sample
> implementation of a platform TSM for PCI Device Security, is the need to
> accommodate PCI_DOMAINS_GENERIC architectures alongside x86 [2].
> 
> In support of supplementing the existing CONFIG_PCI_BRIDGE_EMUL
> infrastructure for host bridges:
> 
> * Introduce pci_bus_find_emul_domain_nr() as a common way to find a free
>   PCI domain number whether that is to reuse the existing dynamic
>   allocation code in the !ACPI case, or to assign an unused domain above
>   the last ACPI segment.
> 
> * Convert pci-hyperv to the new allocator so that the PCI core can
>   unconditionally assume that bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET
>   is the dynamically allocated case.
> 
> A follow on patch can also convert vmd to the new scheme. Currently vmd
> is limited to CONFIG_PCI_DOMAINS_GENERIC=n (x86) so, unlike pci-hyperv,
> it does not immediately conflict with this new
> pci_bus_find_emul_domain_nr() mechanism.
> 
> Link: 
> https://lore.kernel.org/all/174107249038.1288555.12362100502109498455.st...@dwillia2-xfh.jf.intel.com/
>  [1]
> Reported-by: Suzuki K Poulose <suzuki.poul...@arm.com>
> Closes: 
> https://lore.kernel.org/all/20250311144601.145736-3-suzuki.poul...@arm.com/ 
> Cc: Lorenzo Pieralisi <lpieral...@kernel.org>
> Cc: Manivannan Sadhasivam <manivannan.sadhasi...@linaro.org>
> Cc: Rob Herring <r...@kernel.org>
> Cc: Bjorn Helgaas <bhelg...@google.com>
> Cc: "K. Y. Srinivasan" <k...@microsoft.com>
> Cc: Haiyang Zhang <haiya...@microsoft.com>
> Cc: Wei Liu <wei....@kernel.org>
> Cc: Dexuan Cui <de...@microsoft.com>
> Tested-by: Suzuki K Poulose <suzuki.poul...@arm.com>
> Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 53 ++---------------------------
>  drivers/pci/pci.c                   | 43 ++++++++++++++++++++++-
>  drivers/pci/probe.c                 |  8 ++++-
>  include/linux/pci.h                 |  4 +++
>  4 files changed, 56 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c 
> b/drivers/pci/controller/pci-hyperv.c
> index ef5d655a0052..cfe9806bdbe4 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -3630,48 +3630,6 @@ static int hv_send_resources_released(struct hv_device 
> *hdev)
>       return 0;
>  }
> 
> -#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> -static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> -
> -/*
> - * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
> - * as invalid for passthrough PCI devices of this driver.
> - */
> -#define HVPCI_DOM_INVALID 0
> -
> -/**
> - * hv_get_dom_num() - Get a valid PCI domain number
> - * Check if the PCI domain number is in use, and return another number if
> - * it is in use.
> - *
> - * @dom: Requested domain number
> - *
> - * return: domain number on success, HVPCI_DOM_INVALID on failure
> - */
> -static u16 hv_get_dom_num(u16 dom)
> -{
> -     unsigned int i;
> -
> -     if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> -             return dom;
> -
> -     for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> -             if (test_and_set_bit(i, hvpci_dom_map) == 0)
> -                     return i;
> -     }
> -
> -     return HVPCI_DOM_INVALID;
> -}
> -
> -/**
> - * hv_put_dom_num() - Mark the PCI domain number as free
> - * @dom: Domain number to be freed
> - */
> -static void hv_put_dom_num(u16 dom)
> -{
> -     clear_bit(dom, hvpci_dom_map);
> -}
> -
>  /**
>   * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
>   * @hdev:    VMBus's tracking struct for this root PCI bus
> @@ -3715,9 +3673,9 @@ static int hv_pci_probe(struct hv_device *hdev,
>        * collisions) in the same VM.
>        */
>       dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
> -     dom = hv_get_dom_num(dom_req);
> +     dom = pci_bus_find_emul_domain_nr(dom_req);
> 
> -     if (dom == HVPCI_DOM_INVALID) {
> +     if (dom < 0) {
>               dev_err(&hdev->device,
>                       "Unable to use dom# 0x%x or other numbers", dom_req);
>               ret = -EINVAL;
> @@ -3851,7 +3809,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  destroy_wq:
>       destroy_workqueue(hbus->wq);
>  free_dom:
> -     hv_put_dom_num(hbus->bridge->domain_nr);
> +     pci_bus_release_emul_domain_nr(hbus->bridge->domain_nr);
>  free_bus:
>       kfree(hbus);
>       return ret;
> @@ -3976,8 +3934,6 @@ static void hv_pci_remove(struct hv_device *hdev)
>       irq_domain_remove(hbus->irq_domain);
>       irq_domain_free_fwnode(hbus->fwnode);
> 
> -     hv_put_dom_num(hbus->bridge->domain_nr);
> -
>       kfree(hbus);
>  }
> 
> @@ -4148,9 +4104,6 @@ static int __init init_hv_pci_drv(void)
>       if (ret)
>               return ret;
> 
> -     /* Set the invalid domain number's bit, so it will not be used */
> -     set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
> -
>       /* Initialize PCI block r/w interface */
>       hvpci_block_ops.read_block = hv_read_config_block;
>       hvpci_block_ops.write_block = hv_write_config_block;
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index e9448d55113b..833ebf2d5213 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6692,9 +6692,50 @@ static void pci_no_domains(void)
>  #endif
>  }
> 
> +#ifdef CONFIG_PCI_DOMAINS
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
> +
> +/*
> + * Find a free domain_nr either allocated by pci_domain_nr_dynamic_ida or
> + * fallback to the first free domain number above the last ACPI segment 
> number.
> + * Caller may have a specific domain number in mind, in which case try to
> + * reserve it.
> + *
> + * Note that this allocation is freed by pci_release_host_bridge_dev().
> + */
> +int pci_bus_find_emul_domain_nr(int hint)
> +{
> +     if (hint >= 0) {
> +             hint = ida_alloc_range(&pci_domain_nr_dynamic_ida, hint, hint,
> +                                    GFP_KERNEL);

This almost preserves the existing functionality in pci-hyperv.c. But if the
"hint" passed in is zero, current code in pci-hyperv.c treats that as a
collision and allocates some other value. The special treatment of zero is
necessary per the comment with the definition of HVPCI_DOM_INVALID.

I don't have an opinion on whether the code here should treat a "hint"
of zero as invalid, or whether that should be handled in pci-hyperv.c.

> +
> +             if (hint >= 0)
> +                     return hint;
> +     }
> +
> +     if (acpi_disabled)
> +             return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
> +
> +     /*
> +      * Emulated domains start at 0x10000 to not clash with ACPI _SEG
> +      * domains.  Per ACPI r6.0, sec 6.5.6,  _SEG returns an integer, of
> +      * which the lower 16 bits are the PCI Segment Group (domain) number.
> +      * Other bits are currently reserved.
> +      */

Back in 2018 and 2019, the Microsoft Linux team encountered problems with
PCI domain IDs that exceeded 0xFFFF. User space code, such as the Xorg X server,
assumed PCI domain IDs were at most 16 bits, and retained only the low 16 bits
if the value was larger. My memory of the details is vague, but I believe some
or all of this behavior was tied to libpciaccess. As a result of these user 
space
limitations, the pci-hyperv.c code made sure to not create any domain IDs
larger than 0xFFFF. The problem was not just theoretical -- Microsoft had
customers reporting issues due to the "32-bit domain ID problem" and the
pci-hyperv.c code was updated to avoid it.

I don't have information on whether user space code has been fixed, or
the extent to which such a fix has propagated into distro versions. At the
least, a VM with old user space code might break if the kernel is upgraded
to one with this patch. How do people see the risks now that it is 6 years
later? I don't have enough data to make an assessment.

Michael

> +     return ida_alloc_range(&pci_domain_nr_dynamic_ida, 0x10000, INT_MAX,
> +                            GFP_KERNEL);
> +}
> +EXPORT_SYMBOL_GPL(pci_bus_find_emul_domain_nr);
> +
> +void pci_bus_release_emul_domain_nr(int domain_nr)
> +{
> +     ida_free(&pci_domain_nr_dynamic_ida, domain_nr);
> +}
> +EXPORT_SYMBOL_GPL(pci_bus_release_emul_domain_nr);
> +#endif
> +
>  #ifdef CONFIG_PCI_DOMAINS_GENERIC
>  static DEFINE_IDA(pci_domain_nr_static_ida);
> -static DEFINE_IDA(pci_domain_nr_dynamic_ida);
> 
>  static void of_pci_reserve_static_domain_nr(void)
>  {
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 4b8693ec9e4c..e94978c3be3d 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -632,6 +632,11 @@ static void pci_release_host_bridge_dev(struct device 
> *dev)
> 
>       pci_free_resource_list(&bridge->windows);
>       pci_free_resource_list(&bridge->dma_ranges);
> +
> +     /* Host bridges only have domain_nr set in the emulation case */
> +     if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
> +             pci_bus_release_emul_domain_nr(bridge->domain_nr);
> +
>       kfree(bridge);
>  }
> 
> @@ -1112,7 +1117,8 @@ static int pci_register_host_bridge(struct 
> pci_host_bridge *bridge)
>       device_del(&bridge->dev);
>  free:
>  #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -     pci_bus_release_domain_nr(parent, bus->domain_nr);
> +     if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> +             pci_bus_release_domain_nr(parent, bus->domain_nr);
>  #endif
>       if (bus_registered)
>               put_device(&bus->dev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 05e68f35f392..f6a713da5c49 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1934,10 +1934,14 @@ DEFINE_GUARD(pci_dev, struct pci_dev *,
> pci_dev_lock(_T), pci_dev_unlock(_T))
>   */
>  #ifdef CONFIG_PCI_DOMAINS
>  extern int pci_domains_supported;
> +int pci_bus_find_emul_domain_nr(int hint);
> +void pci_bus_release_emul_domain_nr(int domain_nr);
>  #else
>  enum { pci_domains_supported = 0 };
>  static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
>  static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
> +static inline int pci_bus_find_emul_domain_nr(int hint) { return 0; }
> +static inline void pci_bus_release_emul_domain_nr(int domain_nr) { }
>  #endif /* CONFIG_PCI_DOMAINS */
> 
>  /*
> --
> 2.50.1
> 


Reply via email to