On Mon, Feb 2, 2026 at 6:14 PM Mykyta Poturai <[email protected]> wrote:
>
> When a VCPU is migrated to another PCPU, its VPE affinity must be
> updated. Hook into VPE scheduling to ensure that the VPE to be scheduled
> is located on the correct PCPU, if not, move it with VMOVP command.
>
> VMOVP needs to be issued on all ITSes in the system, and in the same
> order, unlsess single VMOVP capable ITS is used.
>
> Signed-off-by: Mykyta Poturai <[email protected]>
> ---
> xen/arch/arm/gic-v3-its.c | 55 ++++++++++++++
> xen/arch/arm/gic-v4-its.c | 105 ++++++++++++++++++++++++++
> xen/arch/arm/include/asm/gic_v3_its.h | 12 +++
> 3 files changed, 172 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index fb1d2709be..be840fbc8f 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -31,6 +31,8 @@
> LIST_HEAD(host_its_list);
>
>
> +unsigned long its_list_map;
> +
> unsigned int nvpeid = 16;
>
> /*
> @@ -612,10 +614,47 @@ static int gicv3_disable_its(struct host_its *hw_its)
> return -ETIMEDOUT;
> }
>
> +static int __init its_compute_its_list_map(struct host_its *hw_its)
> +{
> + int its_number;
> + uint32_t ctlr;
> +
> + its_number = find_first_zero_bit(&its_list_map, GICv4_ITS_LIST_MAX);
> + if ( its_number >= GICv4_ITS_LIST_MAX )
> + {
> + printk(XENLOG_ERR
> + "ITS@%lx: No ITSList entry available!\n", hw_its->addr);
> + return -EINVAL;
> + }
> +
> + ctlr = readl_relaxed(hw_its->its_base + GITS_CTLR);
> + ctlr &= ~GITS_CTLR_ITS_NUMBER;
> + ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT;
> + writel_relaxed(ctlr, hw_its->its_base + GITS_CTLR);
> + ctlr = readl_relaxed(hw_its->its_base + GITS_CTLR);
> + if ( (ctlr & GITS_CTLR_ITS_NUMBER) !=
> + (its_number << GITS_CTLR_ITS_NUMBER_SHIFT) )
> + {
> + its_number = ctlr & GITS_CTLR_ITS_NUMBER;
> + its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT;
> + }
> +
> + if ( test_and_set_bit(its_number, &its_list_map) )
> + {
> + printk(XENLOG_ERR
> + "ITS@%lx: Duplicate ITSList entry %d\n",
> + hw_its->addr, its_number);
> + return -EINVAL;
> + }
> +
> + return its_number;
> +}
> +
> static int gicv3_its_init_single_its(struct host_its *hw_its)
> {
> uint64_t reg;
> int i, ret;
> + int its_number;
>
> hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> if ( !hw_its->its_base )
> @@ -633,6 +672,22 @@ static int gicv3_its_init_single_its(struct host_its
> *hw_its)
> hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
> if ( reg & GITS_TYPER_PTA )
> hw_its->flags |= HOST_ITS_USES_PTA;
> + hw_its->is_v4 = reg & GITS_TYPER_VLPIS;
hw_its->is_v4 = reg & GITS_TYPER_VLPIS only tells us that VLPI
is supported, not the GIC architecture revision. The name is_v4
is misleading. Consider renaming to has_vlpis (or similar) and
use GITS_PIDR2 when you need the GIC arch revision.
> + if ( hw_its->is_v4 )
> + {
> + if ( !(reg & GITS_TYPER_VMOVP) )
> + {
> + its_number = its_compute_its_list_map(hw_its);
> + if ( its_number < 0 )
> + return its_number;
> + dprintk(XENLOG_INFO,
> + "ITS@%lx: Using ITS number %d\n",
> + hw_its->addr, its_number);
> + }
> + else
> + dprintk(XENLOG_INFO,
> + "ITS@%lx: Single VMOVP capable\n", hw_its->addr);
> + }
> spin_lock_init(&hw_its->cmd_lock);
>
> for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> diff --git a/xen/arch/arm/gic-v4-its.c b/xen/arch/arm/gic-v4-its.c
> index 6a550a65b2..175fda7acb 100644
> --- a/xen/arch/arm/gic-v4-its.c
> +++ b/xen/arch/arm/gic-v4-its.c
> @@ -36,6 +36,9 @@ static unsigned long *vpeid_mask;
>
> static spinlock_t vpeid_alloc_lock = SPIN_LOCK_UNLOCKED;
>
> +static uint16_t vmovp_seq_num;
> +static spinlock_t vmovp_lock = SPIN_LOCK_UNLOCKED;
> +
> void __init gicv4_its_vpeid_allocator_init(void)
> {
> /* Allocate space for vpeid_mask based on MAX_VPEID */
> @@ -242,6 +245,57 @@ static int __init its_vpe_init(struct its_vpe *vpe)
> return rc;
> }
>
> +static int its_send_cmd_vmovp(struct its_vpe *vpe)
> +{
> + uint16_t vpeid = vpe->vpe_id;
> + int ret;
> + struct host_its *hw_its;
> +
> + if ( !its_list_map )
> + {
> + uint64_t cmd[4];
> +
> + hw_its = list_first_entry(&host_its_list, struct host_its, entry);
> + cmd[0] = GITS_CMD_VMOVP;
> + cmd[1] = (uint64_t)vpeid << 32;
> + cmd[2] = encode_rdbase(hw_its, vpe->col_idx, 0x0);
> + cmd[3] = 0x00;
> +
> + return its_send_command(hw_its, cmd);
Docs says VMOVP should be followed by VSYNC to synchronize the
context when moving a vPE across CommonLPIAff groups. Here we
issue VMOVP and return without VSYNC. Should we add a VSYNC (for
both the single‑ITS and ITSList paths)?
> + }
> +
> + /*
> + * If using the its_list "feature", we need to make sure that all ITSs
> + * receive all VMOVP commands in the same order. The only way
> + * to guarantee this is to make vmovp a serialization point.
> + */
> + spin_lock(&vmovp_lock);
> +
> + vmovp_seq_num++;
> +
> + /* Emit VMOVPs */
> + list_for_each_entry(hw_its, &host_its_list, entry)
> + {
> + uint64_t cmd[4];
> +
> + cmd[0] = GITS_CMD_VMOVP | ((uint64_t)vmovp_seq_num << 32);
> + cmd[1] = its_list_map | ((uint64_t)vpeid << 32);
> + cmd[2] = encode_rdbase(hw_its, vpe->col_idx, 0x0);
> + cmd[3] = 0x00;
> +
> + ret = its_send_command(hw_its, cmd);
We iterate over all ITSes and send VMOVP to each. Should this be
limited to ITSes participating in the ITSList (and/or only v4 ITSes
that can have the vPE mapping)? Sending to unrelated ITSes
looks unnecessary and may be incorrect.
---
If its_send_command() fails for one ITS in the loop, the VPE
affinity update becomes inconsistent across ITSes. Do we need a
rollback or a recovery path (e.g. retry, VSYNC, or mark vPE unusable)?
> + if ( ret )
> + {
> + spin_unlock(&vmovp_lock);
> + return ret;
> + }
> + }
> +
> + spin_unlock(&vmovp_lock);
> +
> + return 0;
> +}
> +
> static void __init its_vpe_teardown(struct its_vpe *vpe)
> {
> unsigned int order;
> @@ -687,6 +741,52 @@ static void its_make_vpe_non_resident(struct its_vpe
> *vpe, unsigned int cpu)
> vpe->pending_last = val & GICR_VPENDBASER_PendingLast;
> }
>
> +static int vpe_to_cpuid_lock(struct its_vpe *vpe, unsigned long *flags)
> +{
> + spin_lock_irqsave(&vpe->vpe_lock, *flags);
vpe_lock is used but never initialized.
> + return vpe->col_idx;
> +}
> +
> +static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long *flags)
> +{
> + spin_unlock_irqrestore(&vpe->vpe_lock, *flags);
> +}
> +
> +static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
> +{
> + struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
> + unsigned int from, to = vcpu->processor;
> + unsigned long flags;
> + int ret = 0;
> +
> + /*
> + * Changing affinity is mega expensive, so let's be as lazy as
> + * we can and only do it if we really have to. Also, if mapped
> + * into the proxy device, we need to move the doorbell interrupt
> + * to its new location.
> + *
> + * Another thing is that changing the affinity of a vPE affects
> + * *other interrupts* such as all the vLPIs that are routed to
> + * this vPE. This means that we must ensure nobody samples
> + * vpe->col_idx during the update, hence the lock below which
> + * must also be taken on any vLPI handling path that evaluates
> + * vpe->col_idx, such as reg-based vLPI invalidation.
> + */
> + from = vpe_to_cpuid_lock(vpe, &flags);
> + if ( from == to )
> + goto out;
> +
> + vpe->col_idx = to;
updates col_idx before VMOVP; on error it is not
restored.
> +
> + ret = its_send_cmd_vmovp(vpe);
> + if ( ret )
> + goto out;
> +
> + out:
> + vpe_to_cpuid_unlock(vpe, &flags);
> + return ret;
> +}
> +
> void vgic_v4_load(struct vcpu *vcpu)
> {
> struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
> @@ -695,6 +795,11 @@ void vgic_v4_load(struct vcpu *vcpu)
> if ( vpe->resident )
> return;
>
> + /*
> + * Before making the VPE resident, make sure the redistributor
> + * corresponding to our current CPU expects us here
> + */
> + WARN_ON(gicv4_vpe_set_affinity(vcpu));
If gicv4_vpe_set_affinity() fails, continuing to make the vPE resident
on the new CPU is very likely wrong: vLPI routing / doorbells can now
target the wrong redistributor.
Best regards,
Mykola
> its_make_vpe_resident(vpe, vcpu->processor);
> vpe->resident = true;
> }
> diff --git a/xen/arch/arm/include/asm/gic_v3_its.h
> b/xen/arch/arm/include/asm/gic_v3_its.h
> index 411beb81c8..f03a8fad47 100644
> --- a/xen/arch/arm/include/asm/gic_v3_its.h
> +++ b/xen/arch/arm/include/asm/gic_v3_its.h
> @@ -43,6 +43,9 @@
> #define GITS_CTLR_QUIESCENT BIT(31, UL)
> #define GITS_CTLR_ENABLE BIT(0, UL)
>
> +#define GITS_CTLR_ITS_NUMBER_SHIFT 4
> +#define GITS_CTLR_ITS_NUMBER (0xfUL << GITS_CTLR_ITS_NUMBER_SHIFT)
> +
> #define GITS_TYPER_PTA BIT(19, UL)
> #define GITS_TYPER_DEVIDS_SHIFT 13
> #define GITS_TYPER_DEVIDS_MASK (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
> @@ -60,6 +63,8 @@
> GITS_TYPER_ITT_SIZE_SHIFT)
> + 1)
> #define GITS_TYPER_PHYSICAL (1U << 0)
>
> +#define GITS_TYPER_VLPIS (1UL << 1)
> +#define GITS_TYPER_VMOVP (1UL << 37)
> #define GITS_BASER_INDIRECT BIT(62, UL)
> #define GITS_BASER_INNER_CACHEABILITY_SHIFT 59
> #define GITS_BASER_TYPE_SHIFT 56
> @@ -118,6 +123,12 @@
> /* We allocate LPIs on the hosts in chunks of 32 to reduce handling
> overhead. */
> #define LPI_BLOCK 32U
>
> +/*
> + * Maximum number of ITSs when GITS_TYPER.VMOVP == 0, using the
> + * ITSList mechanism to perform inter-ITS synchronization.
> + */
> +#define GICv4_ITS_LIST_MAX 16
> +
> extern unsigned int nvpeid;
> /* The maximum number of VPEID bits supported by VLPI commands */
> #define ITS_MAX_VPEID_BITS nvpeid
> @@ -214,6 +225,7 @@ struct __lpi_data {
> extern struct __lpi_data lpi_data;
>
> extern struct list_head host_its_list;
> +extern unsigned long its_list_map;
>
> int its_send_cmd_discard(struct host_its *its, struct its_device *dev,
> uint32_t eventid);
> --
> 2.51.2