Signed-off-by: Mykyta Poturai <[email protected]>
---
xen/arch/arm/gic-v3-its.c | 13 ++
xen/arch/arm/gic-v3.c | 1 +
xen/arch/arm/gic-v4-its.c | 207 ++++++++++++++++++++++++-
xen/arch/arm/include/asm/gic_v3_defs.h | 7 +
xen/arch/arm/include/asm/gic_v3_its.h | 7 +
xen/arch/arm/include/asm/gic_v4_its.h | 5 +
xen/arch/arm/include/asm/vgic.h | 2 +
7 files changed, 235 insertions(+), 7 deletions(-)
diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index fa5c1eb6d1..5979a82526 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -333,6 +333,19 @@ int its_send_cmd_discard(struct host_its *its, struct
its_device *dev,
return its_send_command(its, cmd);
}
+int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t
eventid,
+ uint16_t icid)
+{
+ uint64_t cmd[4];
+
+ cmd[0] = GITS_CMD_MOVI | ((uint64_t)deviceid << 32);
+ cmd[1] = eventid;
+ cmd[2] = icid;
+ cmd[3] = 0x00;
+
+ return its_send_command(its, cmd);
+}
+
/* Set up the (1:1) collection mapping for the given host CPU. */
int gicv3_its_setup_collection(unsigned int cpu)
{
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 1cb3169b72..fb80038f17 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -2096,6 +2096,7 @@ static void __init gicv4_init(void)
gicv4_its_vpeid_allocator_init();
+ gicv4_init_vpe_proxy();
}
#else
static void __init gicv4_init(void)
diff --git a/xen/arch/arm/gic-v4-its.c b/xen/arch/arm/gic-v4-its.c
index 0462976b93..83ee0510ac 100644
--- a/xen/arch/arm/gic-v4-its.c
+++ b/xen/arch/arm/gic-v4-its.c
@@ -39,6 +39,13 @@ static spinlock_t vpeid_alloc_lock = SPIN_LOCK_UNLOCKED;
static uint16_t vmovp_seq_num;
static spinlock_t vmovp_lock = SPIN_LOCK_UNLOCKED;
+static struct {
+ spinlock_t lock;
+ struct its_device *dev;
+ struct its_vpe **vpes;
+ int next_victim;
+} vpe_proxy;
+
void __init gicv4_its_vpeid_allocator_init(void)
{
/* Allocate space for vpeid_mask based on MAX_VPEID */
@@ -201,6 +208,124 @@ static int its_map_vpe(struct host_its *its, struct
its_vpe *vpe)
return 0;
}
+static int gicv4_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
+{
+ int ret;
+
+ /* Already unmapped? */
+ if ( vpe->vpe_proxy_event == -1 )
+ return 0;
+
+ ret = its_send_cmd_discard(vpe_proxy.dev->hw_its, vpe_proxy.dev,
+ vpe->vpe_proxy_event);
+ if ( ret )
+ return ret;
+ vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL;
+
+ /*
+ * We don't track empty slots at all, so let's move the
+ * next_victim pointer to quickly reuse the unmapped slot
+ */
+ if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
+ vpe_proxy.next_victim = vpe->vpe_proxy_event;
+
+ vpe->vpe_proxy_event = -1;
+
+ return 0;
+}
+
+static void gicv4_vpe_db_proxy_unmap(struct its_vpe *vpe)
+{
+ if ( !gic_support_directLPI() )
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&vpe_proxy.lock, flags);
+ gicv4_vpe_db_proxy_unmap_locked(vpe);
+ spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+ }
+}
+
+/*
+ * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
+ * likely), the only way to perform an invalidate is to use a fake
+ * device to issue an INV command, implying that the LPI has first
+ * been mapped to some event on that device. Since this is not exactly
+ * cheap, we try to keep that mapping around as long as possible, and
+ * only issue an UNMAP if we're short on available slots.
+ *
+ * GICv4.1 mandates that we're able to invalidate by writing to a
+ * MMIO register. And most of the time, we don't even have to invalidate
+ * vPE doorbell, as the redistributor can be told whether to generate a
+ * doorbell or not.
+ */
+static int gicv4_vpe_db_proxy_map_locked(struct its_vpe *vpe)
+{
+ int ret;
+
+ /* Already mapped? */
+ if ( vpe->vpe_proxy_event != -1 )
+ return 0;
+
+ /* This slot was already allocated. Kick the other VPE out. */
+ if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
+ {
+ struct its_vpe *old_vpe = vpe_proxy.vpes[vpe_proxy.next_victim];
+
+ ret = gicv4_vpe_db_proxy_unmap_locked(old_vpe);
+ if ( ret )
+ return ret;
+ }
+
+ /* Map the new VPE instead */
+ vpe_proxy.vpes[vpe_proxy.next_victim] = vpe;
+ vpe->vpe_proxy_event = vpe_proxy.next_victim;
+ vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) %
+ vpe_proxy.dev->eventids;
+
+ return its_send_cmd_mapti(vpe_proxy.dev->hw_its, vpe_proxy.dev->host_devid,
+ vpe->vpe_proxy_event, vpe->vpe_db_lpi,
+ vpe->col_idx);
+}
+
+int __init gicv4_init_vpe_proxy(void)
+{
+ struct host_its *hw_its;
+ uint32_t devid;
+
+ if ( gic_support_directLPI() )
+ {
+ printk("ITS: Using DirectLPI for GICv4 VPE invalidation\n");
+ return 0;
+ }
+
+ /* Any ITS will do, even if not v4 */
+ hw_its = list_first_entry(&host_its_list, struct host_its, entry);
+
+ vpe_proxy.vpes = xzalloc_array(struct its_vpe *, nr_cpu_ids);
+ if ( !vpe_proxy.vpes )
+ {
+ printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device
array\n");
+ return -ENOMEM;
+ }
+
+ /* Use the last possible DevID */
+ devid = BIT(hw_its->devid_bits, UL) - 1;
+ vpe_proxy.dev = its_create_device(hw_its, devid, nr_cpu_ids);
+ if ( !vpe_proxy.dev )
+ {
+ printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&vpe_proxy.lock);
+ vpe_proxy.next_victim = 0;
+ printk(XENLOG_INFO
+ "ITS: Allocated DevID %u as GICv4 VPE proxy device\n", devid);
+
+ return 0;
+}
+
static int __init its_vpe_init(struct its_vpe *vpe)
{
int vpe_id, rc = -ENOMEM;
@@ -224,6 +349,7 @@ static int __init its_vpe_init(struct its_vpe *vpe)
rwlock_init(&vpe->lock);
vpe->vpe_id = vpe_id;
vpe->vpendtable = page_to_virt(vpendtable);
+ vpe->vpe_proxy_event = -1;
/*
* We eagerly inform all the v4 ITS and map vPE to the first
* possible CPU
@@ -299,16 +425,45 @@ static int its_send_cmd_vmovp(struct its_vpe *vpe)
return 0;
}
+/* GICR_SYNCR.Busy == 1 until the invalidation completes. */
+static void wait_for_syncr(void __iomem *rdbase)
+{
+ while ( readl_relaxed(rdbase + GICR_SYNCR) & 1 )
+ cpu_relax();
+}
+
+void direct_lpi_inv(struct its_device *dev, uint32_t eventid,
+ uint32_t db_lpi, unsigned int cpu)
+{
+ void __iomem *rdbase;
+ uint64_t val;
+ /* Register-based LPI invalidation for DB on GICv4.0 */
+ val = FIELD_PREP(GICR_INVLPIR_INTID, db_lpi);
+
+ rdbase = per_cpu(rbase, cpu);
+ writeq_relaxed(val, rdbase + GICR_INVLPIR);
+ wait_for_syncr(rdbase);
+}
static void its_vpe_send_inv_db(struct its_vpe *vpe)
{
- // struct its_device *dev = vpe_proxy.dev;
- // unsigned long flags;
+ if ( gic_support_directLPI() )
+ {
+ unsigned int cpu = vpe->col_idx;
- // spin_lock_irqsave(&vpe_proxy.lock, flags);
- // gicv4_vpe_db_proxy_map_locked(vpe);
- // its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);
- // spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+ /* Target the redistributor this VPE is currently known on */
+ direct_lpi_inv(NULL, 0, vpe->vpe_db_lpi, cpu);
+ }
+ else
+ {
+ struct its_device *dev = vpe_proxy.dev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vpe_proxy.lock, flags);
+ gicv4_vpe_db_proxy_map_locked(vpe);
+ its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);
+ spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+ }
}
static void its_vpe_inv_db(struct its_vpe *vpe)
@@ -335,6 +490,7 @@ static void __init its_vpe_teardown(struct its_vpe *vpe)
unsigned int order;
order = get_order_from_bytes(max(lpi_data.max_host_lpi_ids / 8, (unsigned
long)SZ_64K));
+ gicv4_vpe_db_proxy_unmap(vpe);
its_free_vpeid(vpe->vpe_id);
free_xenheap_pages(vpe->vpendtable, order);
xfree(vpe);
@@ -830,6 +986,43 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe,
unsigned long *flags)
spin_unlock_irqrestore(&vpe->vpe_lock, *flags);
}
+static void gicv4_vpe_db_proxy_move(struct its_vpe *vpe, unsigned int from,
+ unsigned int to)
+{
+ unsigned long flags;
+
+ if ( gic_support_directLPI() )
+ {
+ void __iomem *rdbase;
+
+ rdbase = per_cpu(rbase, from);
+ /* Clear potential pending state on the old redistributor */
+ writeq_relaxed(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
+ wait_for_syncr(rdbase);
+ return;
+ }
+
+ spin_lock_irqsave(&vpe_proxy.lock, flags);
+
+ gicv4_vpe_db_proxy_map_locked(vpe);
+
+ /* MOVI instructs the appropriate Redistributor to move the pending state
*/
+ its_send_cmd_movi(vpe_proxy.dev->hw_its, vpe_proxy.dev->host_devid,
+ vpe->vpe_proxy_event, to);
+
+ /*
+ * ARM spec says that If, after using MOVI to move an interrupt from
+ * collection A to collection B, software moves the same interrupt again
+ * from collection B to collection C, a SYNC command must be used before
+ * the second MOVI for the Redistributor associated with collection A to
+ * ensure correct behavior.
+ * So each time we issue VMOVI, we VSYNC the old VPE for good measure.
+ */
+ WARN_ON(its_send_cmd_sync(vpe_proxy.dev->hw_its, from));
+
+ spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+}
+
static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
{
struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
@@ -859,6 +1052,7 @@ static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
ret = its_send_cmd_vmovp(vpe);
if ( ret )
goto out;
+ gicv4_vpe_db_proxy_move(vpe, from, to);
out:
vpe_to_cpuid_unlock(vpe, &flags);
@@ -940,4 +1134,3 @@ int its_vlpi_prop_update(struct pending_irq *pirq, uint8_t
property,
return its_vlpi_set_doorbell(map, property & LPI_PROP_ENABLED);
}
-
diff --git a/xen/arch/arm/include/asm/gic_v3_defs.h
b/xen/arch/arm/include/asm/gic_v3_defs.h
index 0db75309cf..b4d50516ef 100644
--- a/xen/arch/arm/include/asm/gic_v3_defs.h
+++ b/xen/arch/arm/include/asm/gic_v3_defs.h
@@ -20,6 +20,13 @@
#include <xen/sizes.h>
+#ifndef FIELD_GET
+#define FIELD_GET(_mask, _reg) \
+ ((typeof(_mask))(((_reg) & (_mask)) >> (ffs64(_mask) - 1)))
+#endif
+
+#define FIELD_PREP(_mask, _val) \
+ (((typeof(_mask))(_val) << (ffs64(_mask) - 1)) & (_mask))
/*
* Additional registers defined in GIC v3.
* Common GICD registers are defined in gic.h
diff --git a/xen/arch/arm/include/asm/gic_v3_its.h
b/xen/arch/arm/include/asm/gic_v3_its.h
index dababe97cd..0e82625840 100644
--- a/xen/arch/arm/include/asm/gic_v3_its.h
+++ b/xen/arch/arm/include/asm/gic_v3_its.h
@@ -236,6 +236,11 @@ int its_inv_lpi(struct host_its *its, struct its_device
*dev,
uint32_t eventid, unsigned int cpu);
int its_send_cmd_mapti(struct host_its *its, uint32_t deviceid,
uint32_t eventid, uint32_t pintid, uint16_t icid);
+struct its_device *its_create_device(struct host_its *hw_its,
+ uint32_t host_devid, uint64_t nr_events);
+int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t
eventid,
+ uint16_t icid);
+int its_send_cmd_sync(struct host_its *its, unsigned int cpu);
#ifdef CONFIG_ACPI
unsigned long gicv3_its_make_hwdom_madt(const struct domain *d,
void *base_ptr);
@@ -326,6 +331,8 @@ void its_vpe_mask_db(struct its_vpe *vpe);
int gicv4_its_vlpi_unmap(struct pending_irq *pirq);
int its_vlpi_prop_update(struct pending_irq *pirq, uint8_t property,
bool needs_inv);
+void direct_lpi_inv(struct its_device *dev, uint32_t eventid,
+ uint32_t db_lpi, unsigned int cpu);
/* ITS quirks handling. */
uint64_t gicv3_its_get_cacheability(void);
diff --git a/xen/arch/arm/include/asm/gic_v4_its.h
b/xen/arch/arm/include/asm/gic_v4_its.h
index 37b6b92f0c..1d800fdbaf 100644
--- a/xen/arch/arm/include/asm/gic_v4_its.h
+++ b/xen/arch/arm/include/asm/gic_v4_its.h
@@ -52,6 +52,7 @@ struct event_vlpi_map {
};
void gicv4_its_vpeid_allocator_init(void);
+int gicv4_init_vpe_proxy(void);
#define GICR_VPROPBASER 0x0070
#define GICR_VPENDBASER 0x0078
@@ -97,6 +98,10 @@ static inline void gits_write_vpendbaser(uint64_t val, void
__iomem *addr)
}
#define gits_read_vpendbaser(c) readq_relaxed(c)
+#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID GICR_INVALLR_VPEID
+#define GICR_INVLPIR_V GICR_INVALLR_V
+
#endif
/*
diff --git a/xen/arch/arm/include/asm/vgic.h b/xen/arch/arm/include/asm/vgic.h
index 9ef667decb..558f81818c 100644
--- a/xen/arch/arm/include/asm/vgic.h
+++ b/xen/arch/arm/include/asm/vgic.h
@@ -407,9 +407,11 @@ extern void vgic_check_inflight_irqs_pending(struct vcpu
*v,
/* GICV4 functions */
#ifdef CONFIG_GICV4
+bool gic_support_directLPI(void);
bool gic_support_vptValidDirty(void);
bool gic_is_gicv4(void);
#else
+#define gic_support_directLPI() (false)
#define gic_support_vptValidDirty() (false)
#define gic_is_gicv4() (false)
#endif
--
2.51.2