its: VPE affinity changes

Mykola Kvach Wed, 18 Feb 2026 13:00:37 -0800

On Mon, Feb 2, 2026 at 6:14 PM Mykyta Poturai <[email protected]> wrote:
>
> When a VCPU is migrated to another PCPU, its VPE affinity must be
> updated. Hook into VPE scheduling to ensure that the VPE to be scheduled
> is located on the correct PCPU, if not, move it with VMOVP command.
>
> VMOVP needs to be issued on all ITSes in the system, and in the same
> order, unlsess single VMOVP capable ITS is used.
>
> Signed-off-by: Mykyta Poturai <[email protected]>
> ---
>  xen/arch/arm/gic-v3-its.c             |  55 ++++++++++++++
>  xen/arch/arm/gic-v4-its.c             | 105 ++++++++++++++++++++++++++
>  xen/arch/arm/include/asm/gic_v3_its.h |  12 +++
>  3 files changed, 172 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index fb1d2709be..be840fbc8f 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -31,6 +31,8 @@
>  LIST_HEAD(host_its_list);
>
>
> +unsigned long its_list_map;
> +
>  unsigned int nvpeid = 16;
>
>  /*
> @@ -612,10 +614,47 @@ static int gicv3_disable_its(struct host_its *hw_its)
>      return -ETIMEDOUT;
>  }
>
> +static int __init its_compute_its_list_map(struct host_its *hw_its)
> +{
> +    int its_number;
> +    uint32_t ctlr;
> +
> +    its_number = find_first_zero_bit(&its_list_map, GICv4_ITS_LIST_MAX);
> +    if ( its_number >= GICv4_ITS_LIST_MAX )
> +    {
> +        printk(XENLOG_ERR
> +               "ITS@%lx: No ITSList entry available!\n", hw_its->addr);
> +        return -EINVAL;
> +    }
> +
> +    ctlr = readl_relaxed(hw_its->its_base + GITS_CTLR);
> +    ctlr &= ~GITS_CTLR_ITS_NUMBER;
> +    ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT;
> +    writel_relaxed(ctlr, hw_its->its_base + GITS_CTLR);
> +    ctlr = readl_relaxed(hw_its->its_base + GITS_CTLR);
> +    if ( (ctlr & GITS_CTLR_ITS_NUMBER) !=
> +         (its_number << GITS_CTLR_ITS_NUMBER_SHIFT) )
> +    {
> +        its_number = ctlr & GITS_CTLR_ITS_NUMBER;
> +        its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT;
> +    }
> +
> +    if ( test_and_set_bit(its_number, &its_list_map) )
> +    {
> +        printk(XENLOG_ERR
> +               "ITS@%lx: Duplicate ITSList entry %d\n",
> +               hw_its->addr, its_number);
> +        return -EINVAL;
> +    }
> +
> +    return its_number;
> +}
> +
>  static int gicv3_its_init_single_its(struct host_its *hw_its)
>  {
>      uint64_t reg;
>      int i, ret;
> +    int its_number;
>
>      hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
>      if ( !hw_its->its_base )
> @@ -633,6 +672,22 @@ static int gicv3_its_init_single_its(struct host_its 
> *hw_its)
>      hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
>      if ( reg & GITS_TYPER_PTA )
>          hw_its->flags |= HOST_ITS_USES_PTA;
> +    hw_its->is_v4 = reg & GITS_TYPER_VLPIS;


hw_its->is_v4 = reg & GITS_TYPER_VLPIS only tells us that VLPI
is supported, not the GIC architecture revision. The name is_v4
is misleading. Consider renaming to has_vlpis (or similar) and
use GITS_PIDR2 when you need the GIC arch revision.

> +    if ( hw_its->is_v4 )
> +    {
> +        if ( !(reg & GITS_TYPER_VMOVP) )
> +        {
> +            its_number = its_compute_its_list_map(hw_its);
> +            if ( its_number < 0 )
> +                return its_number;
> +            dprintk(XENLOG_INFO,
> +                    "ITS@%lx: Using ITS number %d\n",
> +                    hw_its->addr, its_number);
> +        }
> +        else
> +            dprintk(XENLOG_INFO,
> +                    "ITS@%lx: Single VMOVP capable\n", hw_its->addr);
> +    }
>      spin_lock_init(&hw_its->cmd_lock);
>
>      for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> diff --git a/xen/arch/arm/gic-v4-its.c b/xen/arch/arm/gic-v4-its.c
> index 6a550a65b2..175fda7acb 100644
> --- a/xen/arch/arm/gic-v4-its.c
> +++ b/xen/arch/arm/gic-v4-its.c
> @@ -36,6 +36,9 @@ static unsigned long *vpeid_mask;
>
>  static spinlock_t vpeid_alloc_lock = SPIN_LOCK_UNLOCKED;
>
> +static uint16_t vmovp_seq_num;
> +static spinlock_t vmovp_lock = SPIN_LOCK_UNLOCKED;
> +
>  void __init gicv4_its_vpeid_allocator_init(void)
>  {
>      /* Allocate space for vpeid_mask based on MAX_VPEID */
> @@ -242,6 +245,57 @@ static int __init its_vpe_init(struct its_vpe *vpe)
>      return rc;
>  }
>
> +static int its_send_cmd_vmovp(struct its_vpe *vpe)
> +{
> +    uint16_t vpeid = vpe->vpe_id;
> +    int ret;
> +    struct host_its *hw_its;
> +
> +    if ( !its_list_map )
> +    {
> +        uint64_t cmd[4];
> +
> +        hw_its = list_first_entry(&host_its_list, struct host_its, entry);
> +        cmd[0] = GITS_CMD_VMOVP;
> +        cmd[1] = (uint64_t)vpeid << 32;
> +        cmd[2] = encode_rdbase(hw_its, vpe->col_idx, 0x0);
> +        cmd[3] = 0x00;
> +
> +        return its_send_command(hw_its, cmd);

Docs says VMOVP should be followed by VSYNC to synchronize the
context when moving a vPE across CommonLPIAff groups. Here we
issue VMOVP and return without VSYNC. Should we add a VSYNC (for
both the single‑ITS and ITSList paths)?

> +    }
> +
> +    /*
> +     * If using the its_list "feature", we need to make sure that all ITSs
> +     * receive all VMOVP commands in the same order. The only way
> +     * to guarantee this is to make vmovp a serialization point.
> +     */
> +    spin_lock(&vmovp_lock);
> +
> +    vmovp_seq_num++;
> +
> +    /* Emit VMOVPs */
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +    {
> +        uint64_t cmd[4];
> +
> +        cmd[0] = GITS_CMD_VMOVP | ((uint64_t)vmovp_seq_num << 32);
> +        cmd[1] = its_list_map | ((uint64_t)vpeid << 32);
> +        cmd[2] = encode_rdbase(hw_its, vpe->col_idx, 0x0);
> +        cmd[3] = 0x00;
> +
> +        ret = its_send_command(hw_its, cmd);

We iterate over all ITSes and send VMOVP to each. Should this be
limited to ITSes participating in the ITSList (and/or only v4 ITSes
that can have the vPE mapping)? Sending to unrelated ITSes
looks unnecessary and may be incorrect.
---

If its_send_command() fails for one ITS in the loop, the VPE
affinity update becomes inconsistent across ITSes. Do we need a
rollback or a recovery path (e.g. retry, VSYNC, or mark vPE unusable)?

> +        if ( ret )
> +        {
> +            spin_unlock(&vmovp_lock);
> +            return ret;
> +        }
> +    }
> +
> +    spin_unlock(&vmovp_lock);
> +
> +    return 0;
> +}
> +
>  static void __init its_vpe_teardown(struct its_vpe *vpe)
>  {
>      unsigned int order;
> @@ -687,6 +741,52 @@ static void its_make_vpe_non_resident(struct its_vpe 
> *vpe, unsigned int cpu)
>      vpe->pending_last = val & GICR_VPENDBASER_PendingLast;
>  }
>
> +static int vpe_to_cpuid_lock(struct its_vpe *vpe, unsigned long *flags)
> +{
> +    spin_lock_irqsave(&vpe->vpe_lock, *flags);

vpe_lock is used but never initialized.

> +    return vpe->col_idx;
> +}
> +
> +static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long *flags)
> +{
> +    spin_unlock_irqrestore(&vpe->vpe_lock, *flags);
> +}
> +
> +static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
> +{
> +    struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
> +    unsigned int from, to = vcpu->processor;
> +    unsigned long flags;
> +    int ret = 0;
> +
> +    /*
> +     * Changing affinity is mega expensive, so let's be as lazy as
> +     * we can and only do it if we really have to. Also, if mapped
> +     * into the proxy device, we need to move the doorbell interrupt
> +     * to its new location.
> +     *
> +     * Another thing is that changing the affinity of a vPE affects
> +     * *other interrupts* such as all the vLPIs that are routed to
> +     * this vPE. This means that we must ensure nobody samples
> +     * vpe->col_idx during the update, hence the lock below which
> +     * must also be taken on any vLPI handling path that evaluates
> +     * vpe->col_idx, such as reg-based vLPI invalidation.
> +     */
> +    from = vpe_to_cpuid_lock(vpe, &flags);
> +    if ( from == to )
> +        goto out;
> +
> +    vpe->col_idx = to;

updates col_idx before VMOVP; on error it is not
restored.

> +
> +    ret = its_send_cmd_vmovp(vpe);
> +    if ( ret )
> +        goto out;
> +
> + out:
> +    vpe_to_cpuid_unlock(vpe, &flags);
> +    return ret;
> +}
> +
>  void vgic_v4_load(struct vcpu *vcpu)
>  {
>      struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
> @@ -695,6 +795,11 @@ void vgic_v4_load(struct vcpu *vcpu)
>      if ( vpe->resident )
>          return;
>
> +    /*
> +     * Before making the VPE resident, make sure the redistributor
> +     * corresponding to our current CPU expects us here
> +     */
> +    WARN_ON(gicv4_vpe_set_affinity(vcpu));

If gicv4_vpe_set_affinity() fails, continuing to make the vPE resident
on the new CPU is very likely wrong: vLPI routing / doorbells can now
target the wrong redistributor.


Best regards,
Mykola



>      its_make_vpe_resident(vpe, vcpu->processor);
>      vpe->resident = true;
>  }
> diff --git a/xen/arch/arm/include/asm/gic_v3_its.h 
> b/xen/arch/arm/include/asm/gic_v3_its.h
> index 411beb81c8..f03a8fad47 100644
> --- a/xen/arch/arm/include/asm/gic_v3_its.h
> +++ b/xen/arch/arm/include/asm/gic_v3_its.h
> @@ -43,6 +43,9 @@
>  #define GITS_CTLR_QUIESCENT             BIT(31, UL)
>  #define GITS_CTLR_ENABLE                BIT(0, UL)
>
> +#define GITS_CTLR_ITS_NUMBER_SHIFT      4
> +#define GITS_CTLR_ITS_NUMBER            (0xfUL << GITS_CTLR_ITS_NUMBER_SHIFT)
> +
>  #define GITS_TYPER_PTA                  BIT(19, UL)
>  #define GITS_TYPER_DEVIDS_SHIFT         13
>  #define GITS_TYPER_DEVIDS_MASK          (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
> @@ -60,6 +63,8 @@
>                                                   GITS_TYPER_ITT_SIZE_SHIFT) 
> + 1)
>  #define GITS_TYPER_PHYSICAL             (1U << 0)
>
> +#define GITS_TYPER_VLPIS                (1UL << 1)
> +#define GITS_TYPER_VMOVP                (1UL << 37)
>  #define GITS_BASER_INDIRECT             BIT(62, UL)
>  #define GITS_BASER_INNER_CACHEABILITY_SHIFT        59
>  #define GITS_BASER_TYPE_SHIFT           56
> @@ -118,6 +123,12 @@
>  /* We allocate LPIs on the hosts in chunks of 32 to reduce handling 
> overhead. */
>  #define LPI_BLOCK                       32U
>
> +/*
> + * Maximum number of ITSs when GITS_TYPER.VMOVP == 0, using the
> + * ITSList mechanism to perform inter-ITS synchronization.
> + */
> +#define GICv4_ITS_LIST_MAX      16
> +
>  extern unsigned int nvpeid;
>  /* The maximum number of VPEID bits supported by VLPI commands */
>  #define ITS_MAX_VPEID_BITS      nvpeid
> @@ -214,6 +225,7 @@ struct __lpi_data {
>  extern struct __lpi_data lpi_data;
>
>  extern struct list_head host_its_list;
> +extern unsigned long its_list_map;
>
>  int its_send_cmd_discard(struct host_its *its, struct its_device *dev,
>                           uint32_t eventid);
> --
> 2.51.2

Re: [RFC PATCH 14/19] arm/its: VPE affinity changes

Reply via email to