Hello, Some comments, mostly code style, nothing functionnal.
Le 09/02/2026 à 12:36, Julian Vetter a écrit : > x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI > addresses and IO-APIC RTEs only provide an 8-bit destination field. > Without extended destination ID support, Linux limits the maximum usable > APIC ID to 255, refusing to bring up vCPUs beyond that limit. So, > advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf, > signalling that guests may use MSI address bits 11:5 and IO-APIC RTE > bits 55:49 as additional high destination ID bits. This expands the > destination ID from 8 to 15 bits. > > Signed-off-by: Julian Vetter <[email protected]> > --- > xen/arch/x86/cpuid.c | 9 +++++++++ > xen/arch/x86/hvm/irq.c | 11 ++++++++++- > xen/arch/x86/hvm/vioapic.c | 2 +- > xen/arch/x86/hvm/vmsi.c | 4 ++-- > xen/arch/x86/include/asm/hvm/hvm.h | 4 ++-- > xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++ > xen/arch/x86/include/asm/msi.h | 3 +++ > 7 files changed, 40 insertions(+), 6 deletions(-) > > diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c > index d85be20d86..fb17c71d74 100644 > --- a/xen/arch/x86/cpuid.c > +++ b/xen/arch/x86/cpuid.c > @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu > *v, uint32_t leaf, > res->a |= XEN_HVM_CPUID_DOMID_PRESENT; > res->c = d->domain_id; > > + /* > + * Advertise extended destination ID support. This allows guests to > use > + * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for > + * additional destination ID bits, expanding the addressable APIC ID > + * range from 8 to 15 bits. This is required for x2APIC guests with > + * APIC IDs > 255. > + */ > + res->a |= XEN_HVM_CPUID_EXT_DEST_ID; > + > /* > * Per-vCPU event channel upcalls are implemented and work > * correctly with PIRQs routed over event channels. > diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c > index 5f64361113..2cc14d37d4 100644 > --- a/xen/arch/x86/hvm/irq.c > +++ b/xen/arch/x86/hvm/irq.c > @@ -374,7 +374,16 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 > isa_irq) > int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data) > { > uint32_t tmp = (uint32_t) addr; > - uint8_t dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; > + /* > + * Standard MSI destination: address bits 19:12 (8 bits). > + * Extended MSI destination: address bits 11:5 (7 more bits). > + * When XEN_HVM_CPUID_EXT_DEST_ID is advertised, the guest may use > + * bits 11:5 for high destination ID bits, expanding to 15 bits total. As we always advertise XEN_HVM_CPUID_EXT_DEST_ID, I would rather say > As XEN_HVM_CPUID_EXT_DEST_ID is advertised, ... > + * For legacy guests these bits are 0, so this is backwards-compatible. "Guests unaware of this feature set these bits to 0, ..." > + */ > + uint32_t dest = > + (((tmp & MSI_ADDR_EXT_DEST_ID_MASK) >> MSI_ADDR_EXT_DEST_ID_SHIFT) > << 8) | > + ((tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT); I wonder if we should introduce a macro like you did for IO-APIC (VIOAPIC_RTE_DEST). > uint8_t dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK); > uint8_t delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK) > >> MSI_DATA_DELIVERY_MODE_SHIFT; > diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c > index 7c725f9e47..263b1bd5cb 100644 > --- a/xen/arch/x86/hvm/vioapic.c > +++ b/xen/arch/x86/hvm/vioapic.c > @@ -411,7 +411,7 @@ static void ioapic_inj_irq( > > static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin) > { > - uint16_t dest = vioapic->redirtbl[pin].fields.dest_id; > + uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits); I would rather introduce a new field in vioapic_redir_entry for the extended dest part; and compute dest from that and dest_id. > uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode; > uint8_t delivery_mode = vioapic->redirtbl[pin].fields.delivery_mode; > uint8_t vector = vioapic->redirtbl[pin].fields.vector; > diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c > index 27b1f089e2..dca191b4f1 100644 > --- a/xen/arch/x86/hvm/vmsi.c > +++ b/xen/arch/x86/hvm/vmsi.c > @@ -66,7 +66,7 @@ static void vmsi_inj_irq( > > int vmsi_deliver( > struct domain *d, int vector, > - uint8_t dest, uint8_t dest_mode, > + uint32_t dest, uint8_t dest_mode, > uint8_t delivery_mode, uint8_t trig_mode) > { > struct vlapic *target; > @@ -125,7 +125,7 @@ void vmsi_deliver_pirq(struct domain *d, const struct > hvm_pirq_dpci *pirq_dpci) > } > > /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */ > -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t > dest_mode) > +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t > dest_mode) > { > int dest_vcpu_id = -1, w = 0; > struct vcpu *v; > diff --git a/xen/arch/x86/include/asm/hvm/hvm.h > b/xen/arch/x86/include/asm/hvm/hvm.h > index 7d9774df59..11256d5e67 100644 > --- a/xen/arch/x86/include/asm/hvm/hvm.h > +++ b/xen/arch/x86/include/asm/hvm/hvm.h > @@ -295,11 +295,11 @@ uint64_t hvm_get_guest_time_fixed(const struct vcpu *v, > uint64_t at_tsc); > > int vmsi_deliver( > struct domain *d, int vector, > - uint8_t dest, uint8_t dest_mode, > + uint32_t dest, uint8_t dest_mode, > uint8_t delivery_mode, uint8_t trig_mode); > struct hvm_pirq_dpci; > void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci > *pirq_dpci); > -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t > dest_mode); > +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t > dest_mode); > > enum hvm_intblk > hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack); > diff --git a/xen/arch/x86/include/asm/hvm/vioapic.h > b/xen/arch/x86/include/asm/hvm/vioapic.h > index 68af6dce79..b49eb348d5 100644 > --- a/xen/arch/x86/include/asm/hvm/vioapic.h > +++ b/xen/arch/x86/include/asm/hvm/vioapic.h > @@ -32,6 +32,19 @@ > #define VIOAPIC_EDGE_TRIG 0 > #define VIOAPIC_LEVEL_TRIG 1 > > +/* > + * Extract the destination ID from a 64-bit IO-APIC RTE, including the > + * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised. > + */ > +#define IO_APIC_REDIR_DEST_SHIFT 56 > +#define IO_APIC_REDIR_DEST_MASK 0xffULL > +#define IO_APIC_REDIR_EXT_DEST_SHIFT 49 > +#define IO_APIC_REDIR_EXT_DEST_MASK 0x7fULL > + > +#define VIOAPIC_RTE_DEST(rte) \ > + ((((rte) >> IO_APIC_REDIR_DEST_SHIFT) & IO_APIC_REDIR_DEST_MASK) | \ > + (((rte) >> IO_APIC_REDIR_EXT_DEST_SHIFT) & IO_APIC_REDIR_EXT_DEST_MASK) > << 8) > + > #define VIOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000U > #define VIOAPIC_MEM_LENGTH 0x100 > > diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h > index 00059d4a3a..b7a132e5b5 100644 > --- a/xen/arch/x86/include/asm/msi.h > +++ b/xen/arch/x86/include/asm/msi.h > @@ -54,6 +54,9 @@ > #define MSI_ADDR_DEST_ID_MASK 0x00ff000 > #define MSI_ADDR_DEST_ID(dest) (((dest) << > MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK) > > +#define MSI_ADDR_EXT_DEST_ID_SHIFT 5 > +#define MSI_ADDR_EXT_DEST_ID_MASK 0x0000fe0 > + > /* MAX fixed pages reserved for mapping MSIX tables. */ > #define FIX_MSIX_MAX_PAGES 512 > -- Teddy Astie | Vates XCP-ng Developer XCP-ng & Xen Orchestra - Vates solutions web: https://vates.tech
