On Mon, Feb 2, 2026 at 6:14 PM Mykyta Poturai <[email protected]> wrote:
>
> Signed-off-by: Mykyta Poturai <[email protected]>
> ---
>  xen/arch/arm/gic-v3-its.c              |  13 ++
>  xen/arch/arm/gic-v3.c                  |   1 +
>  xen/arch/arm/gic-v4-its.c              | 207 ++++++++++++++++++++++++-
>  xen/arch/arm/include/asm/gic_v3_defs.h |   7 +
>  xen/arch/arm/include/asm/gic_v3_its.h  |   7 +
>  xen/arch/arm/include/asm/gic_v4_its.h  |   5 +
>  xen/arch/arm/include/asm/vgic.h        |   2 +
>  7 files changed, 235 insertions(+), 7 deletions(-)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index fa5c1eb6d1..5979a82526 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -333,6 +333,19 @@ int its_send_cmd_discard(struct host_its *its, struct 
> its_device *dev,
>      return its_send_command(its, cmd);
>  }
>
> +int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t 
> eventid,
> +                      uint16_t icid)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_MOVI | ((uint64_t)deviceid << 32);
> +    cmd[1] = eventid;
> +    cmd[2] = icid;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
>  /* Set up the (1:1) collection mapping for the given host CPU. */
>  int gicv3_its_setup_collection(unsigned int cpu)
>  {
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index 1cb3169b72..fb80038f17 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -2096,6 +2096,7 @@ static void __init gicv4_init(void)
>
>      gicv4_its_vpeid_allocator_init();
>
> +    gicv4_init_vpe_proxy();

This call ignores errors from gicv4_init_vpe_proxy(). If it returns an error
code, please handle it (propagate up, or at least log and bail out) to avoid
continuing with a partially initialized proxy.

>  }
>  #else
>  static void __init gicv4_init(void)
> diff --git a/xen/arch/arm/gic-v4-its.c b/xen/arch/arm/gic-v4-its.c
> index 0462976b93..83ee0510ac 100644
> --- a/xen/arch/arm/gic-v4-its.c
> +++ b/xen/arch/arm/gic-v4-its.c
> @@ -39,6 +39,13 @@ static spinlock_t vpeid_alloc_lock = SPIN_LOCK_UNLOCKED;
>  static uint16_t vmovp_seq_num;
>  static spinlock_t vmovp_lock = SPIN_LOCK_UNLOCKED;
>
> +static struct {
> +    spinlock_t lock;
> +    struct its_device *dev;
> +    struct its_vpe **vpes;
> +    int next_victim;
> +} vpe_proxy;
> +
>  void __init gicv4_its_vpeid_allocator_init(void)
>  {
>      /* Allocate space for vpeid_mask based on MAX_VPEID */
> @@ -201,6 +208,124 @@ static int its_map_vpe(struct host_its *its, struct 
> its_vpe *vpe)
>
>      return 0;
>  }
> +static int gicv4_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
> +{
> +    int ret;
> +
> +    /* Already unmapped? */
> +    if ( vpe->vpe_proxy_event == -1 )
> +        return 0;
> +
> +    ret = its_send_cmd_discard(vpe_proxy.dev->hw_its, vpe_proxy.dev,
> +                               vpe->vpe_proxy_event);
> +    if ( ret )
> +        return ret;
> +    vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL;
> +
> +    /*
> +     * We don't track empty slots at all, so let's move the
> +     * next_victim pointer to quickly reuse the unmapped slot
> +     */
> +    if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
> +        vpe_proxy.next_victim = vpe->vpe_proxy_event;
> +
> +    vpe->vpe_proxy_event = -1;
> +
> +    return 0;
> +}
> +
> +static void gicv4_vpe_db_proxy_unmap(struct its_vpe *vpe)
> +{
> +    if ( !gic_support_directLPI() )
> +    {
> +        unsigned long flags;
> +
> +        spin_lock_irqsave(&vpe_proxy.lock, flags);
> +        gicv4_vpe_db_proxy_unmap_locked(vpe);

Both return values are ignored. If map_locked() fails and leaves
vpe_proxy_event at the sentinel value, INV may use an invalid eventid.
At minimum: check map_locked() and INV return codes and avoid issuing
INV on failure.

> +        spin_unlock_irqrestore(&vpe_proxy.lock, flags);
> +    }
> +}
> +
> +/*
> + * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
> + * likely), the only way to perform an invalidate is to use a fake
> + * device to issue an INV command, implying that the LPI has first
> + * been mapped to some event on that device. Since this is not exactly
> + * cheap, we try to keep that mapping around as long as possible, and
> + * only issue an UNMAP if we're short on available slots.
> + *
> + * GICv4.1 mandates that we're able to invalidate by writing to a
> + * MMIO register. And most of the time, we don't even have to invalidate
> + * vPE doorbell, as the redistributor can be told whether to generate a
> + * doorbell or not.
> + */
> +static int gicv4_vpe_db_proxy_map_locked(struct its_vpe *vpe)
> +{
> +    int ret;
> +
> +    /* Already mapped? */
> +    if ( vpe->vpe_proxy_event != -1 )
> +        return 0;
> +
> +    /* This slot was already allocated. Kick the other VPE out. */
> +    if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
> +    {
> +        struct its_vpe *old_vpe = vpe_proxy.vpes[vpe_proxy.next_victim];
> +
> +        ret = gicv4_vpe_db_proxy_unmap_locked(old_vpe);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    /* Map the new VPE instead */
> +    vpe_proxy.vpes[vpe_proxy.next_victim] = vpe;
> +    vpe->vpe_proxy_event = vpe_proxy.next_victim;
> +    vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) %
> +                            vpe_proxy.dev->eventids;
> +
> +    return its_send_cmd_mapti(vpe_proxy.dev->hw_its, 
> vpe_proxy.dev->host_devid,

The software state is committed before MAPTI completes. If MAPTI
fails, the vpe stays "mapped" in SW (and next_victim advances), but HW
doesn't have the translation. I think this needs rollback on failure
(or update state only after MAPTI succeeds).

> +                              vpe->vpe_proxy_event, vpe->vpe_db_lpi,
> +                              vpe->col_idx);
> +}
> +
> +int __init gicv4_init_vpe_proxy(void)
> +{
> +    struct host_its *hw_its;
> +    uint32_t devid;
> +
> +    if ( gic_support_directLPI() )
> +    {
> +        printk("ITS: Using DirectLPI for GICv4 VPE invalidation\n");
> +        return 0;
> +    }
> +
> +    /* Any ITS will do, even if not v4 */
> +    hw_its = list_first_entry(&host_its_list, struct host_its, entry);

host_its_list may be empty here. list_first_entry() is unsafe in that
case please use list_first_entry_or_null() (or check list_empty()) and
handle the NULL/empty case

> +
> +    vpe_proxy.vpes = xzalloc_array(struct its_vpe *, nr_cpu_ids);
> +    if ( !vpe_proxy.vpes )
> +    {
> +        printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device 
> array\n");
> +        return -ENOMEM;
> +    }
> +
> +    /* Use the last possible DevID */
> +    devid = BIT(hw_its->devid_bits, UL) - 1;

This "pick the last DevID" policy looks fragile: it can collide with
other Xen ITS users if they ever end up allocating across the full
DevID space.

> +    vpe_proxy.dev = its_create_device(hw_its, devid, nr_cpu_ids);
> +    if ( !vpe_proxy.dev )
> +    {
> +        printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device\n");

vpe_proxy.vpes is leaked when vpe_proxy.dev allocation fails

> +        return -ENOMEM;
> +    }
> +
> +    spin_lock_init(&vpe_proxy.lock);
> +    vpe_proxy.next_victim = 0;
> +    printk(XENLOG_INFO
> +           "ITS: Allocated DevID %u as GICv4 VPE proxy device\n", devid);
> +
> +    return 0;
> +}
> +
>  static int __init its_vpe_init(struct its_vpe *vpe)
>  {
>      int vpe_id, rc = -ENOMEM;
> @@ -224,6 +349,7 @@ static int __init its_vpe_init(struct its_vpe *vpe)
>      rwlock_init(&vpe->lock);
>      vpe->vpe_id = vpe_id;
>      vpe->vpendtable = page_to_virt(vpendtable);
> +        vpe->vpe_proxy_event = -1;
>      /*
>       * We eagerly inform all the v4 ITS and map vPE to the first
>       * possible CPU
> @@ -299,16 +425,45 @@ static int its_send_cmd_vmovp(struct its_vpe *vpe)
>      return 0;
>  }
>
> +/* GICR_SYNCR.Busy == 1 until the invalidation completes. */
> +static void wait_for_syncr(void __iomem *rdbase)
> +{
> +    while ( readl_relaxed(rdbase + GICR_SYNCR) & 1 )
> +        cpu_relax();

This is an unbounded busy-wait. If SYNCR.Busy gets stuck (or RDbase is
wrong), Xen can hang. It likely needs a timeout + error path (similar
to other GIC polling loops).

> +}
> +
> +void direct_lpi_inv(struct its_device *dev, uint32_t eventid,

nit: dev and eventid is unused

> +                    uint32_t db_lpi, unsigned int cpu)
> +{
> +    void __iomem *rdbase;
> +    uint64_t val;
> +    /* Register-based LPI invalidation for DB on GICv4.0 */
> +    val = FIELD_PREP(GICR_INVLPIR_INTID, db_lpi);
> +
> +    rdbase = per_cpu(rbase, cpu);
> +    writeq_relaxed(val, rdbase + GICR_INVLPIR);

direct_lpi_inv() writes GICR_INVLPIR unconditionally.

Note that writing GICR_INVLPIR/GICR_INVALLR while GICR_SYNCR.Busy==1
is CONSTRAINED UNPREDICTABLE (the write may be ignored, or the
invalidate may be performed). So we need to ensure Busy==0 before
issuing a new invalidate, not only wait after the write.

Please add a pre-check loop (or serialize callers) to guarantee
GICR_SYNCR.Busy==0 before the INVLPIR write.

proxy lock protects only the ITS proxy path; direct INVLPIR must also
avoid writes while GICR_SYNCR.Busy==1 (pre-check or separate lock)

> +    wait_for_syncr(rdbase);
> +}
>
>  static void its_vpe_send_inv_db(struct its_vpe *vpe)
>  {
> -    // struct its_device *dev = vpe_proxy.dev;
> -    // unsigned long flags;
> +    if ( gic_support_directLPI() )
> +    {
> +        unsigned int cpu = vpe->col_idx;

We read vpe->col_idx without the vpe lock. Should we use
vpe_to_cpuid_lock() (or otherwise hold vpe_lock) to avoid a
race with affinity updates?

>
> -    // spin_lock_irqsave(&vpe_proxy.lock, flags);
> -    // gicv4_vpe_db_proxy_map_locked(vpe);
> -    // its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);
> -    // spin_unlock_irqrestore(&vpe_proxy.lock, flags);
> +        /* Target the redistributor this VPE is currently known on */
> +        direct_lpi_inv(NULL, 0, vpe->vpe_db_lpi, cpu);

I think we should document the ordering constraint at the point where
we select between direct_lpi_inv() and the ITS/proxy invalidation path
(its_vpe_send_inv_db()).

The architecture requires register-based invalidation (GICR_INVLPIR /
GICR_INVALLR + polling GICR_SYNCR.Busy) to be serialized against any
ITS command that affects the same interrupt. Overlapping these
operations is CONSTRAINED UNPREDICTABLE.

> +    }
> +    else
> +    {
> +        struct its_device *dev = vpe_proxy.dev;
> +        unsigned long flags;
> +
> +        spin_lock_irqsave(&vpe_proxy.lock, flags);
> +        gicv4_vpe_db_proxy_map_locked(vpe);
> +        its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);

Both return values are ignored. If map_locked() fails and leaves
vpe_proxy_event at the sentinel value, INV may use an invalid eventid.
At minimum: check map_locked() and INV return codes and avoid issuing
INV on failure.

Need to review and handle errors in other places too.

> +        spin_unlock_irqrestore(&vpe_proxy.lock, flags);
> +    }
>  }
>
>  static void its_vpe_inv_db(struct its_vpe *vpe)
> @@ -335,6 +490,7 @@ static void __init its_vpe_teardown(struct its_vpe *vpe)
>      unsigned int order;
>
>      order = get_order_from_bytes(max(lpi_data.max_host_lpi_ids / 8, 
> (unsigned long)SZ_64K));
> +    gicv4_vpe_db_proxy_unmap(vpe);
>      its_free_vpeid(vpe->vpe_id);
>      free_xenheap_pages(vpe->vpendtable, order);
>      xfree(vpe);
> @@ -830,6 +986,43 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, 
> unsigned long *flags)
>      spin_unlock_irqrestore(&vpe->vpe_lock, *flags);
>  }
>
> +static void gicv4_vpe_db_proxy_move(struct its_vpe *vpe, unsigned int from,
> +                                    unsigned int to)
> +{
> +    unsigned long flags;
> +
> +    if ( gic_support_directLPI() )
> +    {
> +        void __iomem *rdbase;
> +
> +        rdbase = per_cpu(rbase, from);
> +        /* Clear potential pending state on the old redistributor */
> +        writeq_relaxed(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
> +        wait_for_syncr(rdbase);
> +        return;
> +    }
> +
> +    spin_lock_irqsave(&vpe_proxy.lock, flags);
> +
> +    gicv4_vpe_db_proxy_map_locked(vpe);
> +
> +    /* MOVI instructs the appropriate Redistributor to move the pending 
> state */
> +    its_send_cmd_movi(vpe_proxy.dev->hw_its, vpe_proxy.dev->host_devid,
> +                      vpe->vpe_proxy_event, to);

handle ret from its_send_cmd_movi


> +
> +    /*
> +     * ARM spec says that If, after using MOVI to move an interrupt from
> +     * collection A to collection B, software moves the same interrupt again
> +     * from collection B to collection C, a SYNC command must be used before
> +     * the second MOVI for the Redistributor associated with collection A to
> +     * ensure correct behavior.
> +     * So each time we issue VMOVI, we VSYNC the old VPE for good measure.
> +     */
> +    WARN_ON(its_send_cmd_sync(vpe_proxy.dev->hw_its, from));
> +
> +    spin_unlock_irqrestore(&vpe_proxy.lock, flags);
> +}
> +
>  static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
>  {
>      struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
> @@ -859,6 +1052,7 @@ static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
>      ret = its_send_cmd_vmovp(vpe);
>      if ( ret )
>          goto out;
> +    gicv4_vpe_db_proxy_move(vpe, from, to);
>
>   out:
>      vpe_to_cpuid_unlock(vpe, &flags);
> @@ -940,4 +1134,3 @@ int its_vlpi_prop_update(struct pending_irq *pirq, 
> uint8_t property,
>
>      return its_vlpi_set_doorbell(map, property & LPI_PROP_ENABLED);
>  }
> -
> diff --git a/xen/arch/arm/include/asm/gic_v3_defs.h 
> b/xen/arch/arm/include/asm/gic_v3_defs.h
> index 0db75309cf..b4d50516ef 100644
> --- a/xen/arch/arm/include/asm/gic_v3_defs.h
> +++ b/xen/arch/arm/include/asm/gic_v3_defs.h
> @@ -20,6 +20,13 @@
>
>  #include <xen/sizes.h>
>
> +#ifndef FIELD_GET
> +#define FIELD_GET(_mask, _reg)                 \
> +       ((typeof(_mask))(((_reg) & (_mask)) >> (ffs64(_mask) - 1)))
> +#endif
> +
> +#define FIELD_PREP(_mask, _val)                        \
> +       (((typeof(_mask))(_val) << (ffs64(_mask) - 1)) & (_mask))

Guard this macro too, it can be redefined

>  /*
>   * Additional registers defined in GIC v3.
>   * Common GICD registers are defined in gic.h
> diff --git a/xen/arch/arm/include/asm/gic_v3_its.h 
> b/xen/arch/arm/include/asm/gic_v3_its.h
> index dababe97cd..0e82625840 100644
> --- a/xen/arch/arm/include/asm/gic_v3_its.h
> +++ b/xen/arch/arm/include/asm/gic_v3_its.h
> @@ -236,6 +236,11 @@ int its_inv_lpi(struct host_its *its, struct its_device 
> *dev,
>                  uint32_t eventid, unsigned int cpu);
>  int its_send_cmd_mapti(struct host_its *its, uint32_t deviceid,
>                         uint32_t eventid, uint32_t pintid, uint16_t icid);
> +struct its_device *its_create_device(struct host_its *hw_its,
> +                                     uint32_t host_devid, uint64_t 
> nr_events);
> +int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t 
> eventid,
> +                      uint16_t icid);
> +int its_send_cmd_sync(struct host_its *its, unsigned int cpu);
>  #ifdef CONFIG_ACPI
>  unsigned long gicv3_its_make_hwdom_madt(const struct domain *d,
>                                          void *base_ptr);
> @@ -326,6 +331,8 @@ void its_vpe_mask_db(struct its_vpe *vpe);
>  int gicv4_its_vlpi_unmap(struct pending_irq *pirq);
>  int its_vlpi_prop_update(struct pending_irq *pirq, uint8_t property,
>                           bool needs_inv);
> +void direct_lpi_inv(struct its_device *dev, uint32_t eventid,
> +                    uint32_t db_lpi, unsigned int cpu);
>
>  /* ITS quirks handling. */
>  uint64_t gicv3_its_get_cacheability(void);
> diff --git a/xen/arch/arm/include/asm/gic_v4_its.h 
> b/xen/arch/arm/include/asm/gic_v4_its.h
> index 37b6b92f0c..1d800fdbaf 100644
> --- a/xen/arch/arm/include/asm/gic_v4_its.h
> +++ b/xen/arch/arm/include/asm/gic_v4_its.h
> @@ -52,6 +52,7 @@ struct event_vlpi_map {
>  };
>
>  void gicv4_its_vpeid_allocator_init(void);
> +int gicv4_init_vpe_proxy(void);
>
>  #define GICR_VPROPBASER                              0x0070
>  #define GICR_VPENDBASER                              0x0078
> @@ -97,6 +98,10 @@ static inline void gits_write_vpendbaser(uint64_t val, 
> void __iomem *addr)
>  }
>  #define gits_read_vpendbaser(c)     readq_relaxed(c)
>
> +#define GICR_INVLPIR_INTID                GENMASK_ULL(31, 0)
> +#define GICR_INVLPIR_VPEID                GICR_INVALLR_VPEID
> +#define GICR_INVLPIR_V                    GICR_INVALLR_V

GICR_INVALLR_VPEID and GICR_INVALLR_V are not defined, so this does
not build as-is.

Also note that the V/VPEID fields in GICR_INVLPIR/INVALLR exist only
for GICv4.1 (FEAT_GICv4p1). For GICv4.0 these bits are RES0.


Best regards,
Mykola

> +
>  #endif
>
>  /*
> diff --git a/xen/arch/arm/include/asm/vgic.h b/xen/arch/arm/include/asm/vgic.h
> index 9ef667decb..558f81818c 100644
> --- a/xen/arch/arm/include/asm/vgic.h
> +++ b/xen/arch/arm/include/asm/vgic.h
> @@ -407,9 +407,11 @@ extern void vgic_check_inflight_irqs_pending(struct vcpu 
> *v,
>
>  /* GICV4 functions */
>  #ifdef CONFIG_GICV4
> +bool gic_support_directLPI(void);
>  bool gic_support_vptValidDirty(void);
>  bool gic_is_gicv4(void);
>  #else
> +#define gic_support_directLPI() (false)
>  #define gic_support_vptValidDirty() (false)
>  #define gic_is_gicv4() (false)
>  #endif
> --
> 2.51.2

Reply via email to