Signed-off-by: Mykyta Poturai <[email protected]>
---
 xen/arch/arm/gic-v3-its.c              |  13 ++
 xen/arch/arm/gic-v3.c                  |   1 +
 xen/arch/arm/gic-v4-its.c              | 207 ++++++++++++++++++++++++-
 xen/arch/arm/include/asm/gic_v3_defs.h |   7 +
 xen/arch/arm/include/asm/gic_v3_its.h  |   7 +
 xen/arch/arm/include/asm/gic_v4_its.h  |   5 +
 xen/arch/arm/include/asm/vgic.h        |   2 +
 7 files changed, 235 insertions(+), 7 deletions(-)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index fa5c1eb6d1..5979a82526 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -333,6 +333,19 @@ int its_send_cmd_discard(struct host_its *its, struct 
its_device *dev,
     return its_send_command(its, cmd);
 }
 
+int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t 
eventid,
+                      uint16_t icid)
+{
+    uint64_t cmd[4];
+
+    cmd[0] = GITS_CMD_MOVI | ((uint64_t)deviceid << 32);
+    cmd[1] = eventid;
+    cmd[2] = icid;
+    cmd[3] = 0x00;
+
+    return its_send_command(its, cmd);
+}
+
 /* Set up the (1:1) collection mapping for the given host CPU. */
 int gicv3_its_setup_collection(unsigned int cpu)
 {
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 1cb3169b72..fb80038f17 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -2096,6 +2096,7 @@ static void __init gicv4_init(void)
 
     gicv4_its_vpeid_allocator_init();
 
+    gicv4_init_vpe_proxy();
 }
 #else
 static void __init gicv4_init(void)
diff --git a/xen/arch/arm/gic-v4-its.c b/xen/arch/arm/gic-v4-its.c
index 0462976b93..83ee0510ac 100644
--- a/xen/arch/arm/gic-v4-its.c
+++ b/xen/arch/arm/gic-v4-its.c
@@ -39,6 +39,13 @@ static spinlock_t vpeid_alloc_lock = SPIN_LOCK_UNLOCKED;
 static uint16_t vmovp_seq_num;
 static spinlock_t vmovp_lock = SPIN_LOCK_UNLOCKED;
 
+static struct {
+    spinlock_t lock;
+    struct its_device *dev;
+    struct its_vpe **vpes;
+    int next_victim;
+} vpe_proxy;
+
 void __init gicv4_its_vpeid_allocator_init(void)
 {
     /* Allocate space for vpeid_mask based on MAX_VPEID */
@@ -201,6 +208,124 @@ static int its_map_vpe(struct host_its *its, struct 
its_vpe *vpe)
 
     return 0;
 }
+static int gicv4_vpe_db_proxy_unmap_locked(struct its_vpe *vpe)
+{
+    int ret;
+
+    /* Already unmapped? */
+    if ( vpe->vpe_proxy_event == -1 )
+        return 0;
+
+    ret = its_send_cmd_discard(vpe_proxy.dev->hw_its, vpe_proxy.dev,
+                               vpe->vpe_proxy_event);
+    if ( ret )
+        return ret;
+    vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL;
+
+    /*
+     * We don't track empty slots at all, so let's move the
+     * next_victim pointer to quickly reuse the unmapped slot
+     */
+    if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
+        vpe_proxy.next_victim = vpe->vpe_proxy_event;
+
+    vpe->vpe_proxy_event = -1;
+
+    return 0;
+}
+
+static void gicv4_vpe_db_proxy_unmap(struct its_vpe *vpe)
+{
+    if ( !gic_support_directLPI() )
+    {
+        unsigned long flags;
+
+        spin_lock_irqsave(&vpe_proxy.lock, flags);
+        gicv4_vpe_db_proxy_unmap_locked(vpe);
+        spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+    }
+}
+
+/*
+ * If a GICv4.0 doesn't implement Direct LPIs (which is extremely
+ * likely), the only way to perform an invalidate is to use a fake
+ * device to issue an INV command, implying that the LPI has first
+ * been mapped to some event on that device. Since this is not exactly
+ * cheap, we try to keep that mapping around as long as possible, and
+ * only issue an UNMAP if we're short on available slots.
+ *
+ * GICv4.1 mandates that we're able to invalidate by writing to a
+ * MMIO register. And most of the time, we don't even have to invalidate
+ * vPE doorbell, as the redistributor can be told whether to generate a
+ * doorbell or not.
+ */
+static int gicv4_vpe_db_proxy_map_locked(struct its_vpe *vpe)
+{
+    int ret;
+
+    /* Already mapped? */
+    if ( vpe->vpe_proxy_event != -1 )
+        return 0;
+
+    /* This slot was already allocated. Kick the other VPE out. */
+    if ( vpe_proxy.vpes[vpe_proxy.next_victim] )
+    {
+        struct its_vpe *old_vpe = vpe_proxy.vpes[vpe_proxy.next_victim];
+
+        ret = gicv4_vpe_db_proxy_unmap_locked(old_vpe);
+        if ( ret )
+            return ret;
+    }
+
+    /* Map the new VPE instead */
+    vpe_proxy.vpes[vpe_proxy.next_victim] = vpe;
+    vpe->vpe_proxy_event = vpe_proxy.next_victim;
+    vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) %
+                            vpe_proxy.dev->eventids;
+
+    return its_send_cmd_mapti(vpe_proxy.dev->hw_its, vpe_proxy.dev->host_devid,
+                              vpe->vpe_proxy_event, vpe->vpe_db_lpi,
+                              vpe->col_idx);
+}
+
+int __init gicv4_init_vpe_proxy(void)
+{
+    struct host_its *hw_its;
+    uint32_t devid;
+
+    if ( gic_support_directLPI() )
+    {
+        printk("ITS: Using DirectLPI for GICv4 VPE invalidation\n");
+        return 0;
+    }
+
+    /* Any ITS will do, even if not v4 */
+    hw_its = list_first_entry(&host_its_list, struct host_its, entry);
+
+    vpe_proxy.vpes = xzalloc_array(struct its_vpe *, nr_cpu_ids);
+    if ( !vpe_proxy.vpes )
+    {
+        printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device 
array\n");
+        return -ENOMEM;
+    }
+
+    /* Use the last possible DevID */
+    devid = BIT(hw_its->devid_bits, UL) - 1;
+    vpe_proxy.dev = its_create_device(hw_its, devid, nr_cpu_ids);
+    if ( !vpe_proxy.dev )
+    {
+        printk(XENLOG_ERR "ITS: Can't allocate GICv4 VPE proxy device\n");
+        return -ENOMEM;
+    }
+
+    spin_lock_init(&vpe_proxy.lock);
+    vpe_proxy.next_victim = 0;
+    printk(XENLOG_INFO
+           "ITS: Allocated DevID %u as GICv4 VPE proxy device\n", devid);
+
+    return 0;
+}
+
 static int __init its_vpe_init(struct its_vpe *vpe)
 {
     int vpe_id, rc = -ENOMEM;
@@ -224,6 +349,7 @@ static int __init its_vpe_init(struct its_vpe *vpe)
     rwlock_init(&vpe->lock);
     vpe->vpe_id = vpe_id;
     vpe->vpendtable = page_to_virt(vpendtable);
+        vpe->vpe_proxy_event = -1;
     /*
      * We eagerly inform all the v4 ITS and map vPE to the first
      * possible CPU
@@ -299,16 +425,45 @@ static int its_send_cmd_vmovp(struct its_vpe *vpe)
     return 0;
 }
 
+/* GICR_SYNCR.Busy == 1 until the invalidation completes. */
+static void wait_for_syncr(void __iomem *rdbase)
+{
+    while ( readl_relaxed(rdbase + GICR_SYNCR) & 1 )
+        cpu_relax();
+}
+
+void direct_lpi_inv(struct its_device *dev, uint32_t eventid,
+                    uint32_t db_lpi, unsigned int cpu)
+{
+    void __iomem *rdbase;
+    uint64_t val;
+    /* Register-based LPI invalidation for DB on GICv4.0 */
+    val = FIELD_PREP(GICR_INVLPIR_INTID, db_lpi);
+
+    rdbase = per_cpu(rbase, cpu);
+    writeq_relaxed(val, rdbase + GICR_INVLPIR);
+    wait_for_syncr(rdbase);
+}
 
 static void its_vpe_send_inv_db(struct its_vpe *vpe)
 {
-    // struct its_device *dev = vpe_proxy.dev;
-    // unsigned long flags;
+    if ( gic_support_directLPI() )
+    {
+        unsigned int cpu = vpe->col_idx;
 
-    // spin_lock_irqsave(&vpe_proxy.lock, flags);
-    // gicv4_vpe_db_proxy_map_locked(vpe);
-    // its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);
-    // spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+        /* Target the redistributor this VPE is currently known on */
+        direct_lpi_inv(NULL, 0, vpe->vpe_db_lpi, cpu);
+    }
+    else
+    {
+        struct its_device *dev = vpe_proxy.dev;
+        unsigned long flags;
+
+        spin_lock_irqsave(&vpe_proxy.lock, flags);
+        gicv4_vpe_db_proxy_map_locked(vpe);
+        its_send_cmd_inv(dev->hw_its, dev->host_devid, vpe->vpe_proxy_event);
+        spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+    }
 }
 
 static void its_vpe_inv_db(struct its_vpe *vpe)
@@ -335,6 +490,7 @@ static void __init its_vpe_teardown(struct its_vpe *vpe)
     unsigned int order;
 
     order = get_order_from_bytes(max(lpi_data.max_host_lpi_ids / 8, (unsigned 
long)SZ_64K));
+    gicv4_vpe_db_proxy_unmap(vpe);
     its_free_vpeid(vpe->vpe_id);
     free_xenheap_pages(vpe->vpendtable, order);
     xfree(vpe);
@@ -830,6 +986,43 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, 
unsigned long *flags)
     spin_unlock_irqrestore(&vpe->vpe_lock, *flags);
 }
 
+static void gicv4_vpe_db_proxy_move(struct its_vpe *vpe, unsigned int from,
+                                    unsigned int to)
+{
+    unsigned long flags;
+
+    if ( gic_support_directLPI() )
+    {
+        void __iomem *rdbase;
+
+        rdbase = per_cpu(rbase, from);
+        /* Clear potential pending state on the old redistributor */
+        writeq_relaxed(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
+        wait_for_syncr(rdbase);
+        return;
+    }
+
+    spin_lock_irqsave(&vpe_proxy.lock, flags);
+
+    gicv4_vpe_db_proxy_map_locked(vpe);
+
+    /* MOVI instructs the appropriate Redistributor to move the pending state 
*/
+    its_send_cmd_movi(vpe_proxy.dev->hw_its, vpe_proxy.dev->host_devid,
+                      vpe->vpe_proxy_event, to);
+
+    /*
+     * ARM spec says that If, after using MOVI to move an interrupt from
+     * collection A to collection B, software moves the same interrupt again
+     * from collection B to collection C, a SYNC command must be used before
+     * the second MOVI for the Redistributor associated with collection A to
+     * ensure correct behavior.
+     * So each time we issue VMOVI, we VSYNC the old VPE for good measure.
+     */
+    WARN_ON(its_send_cmd_sync(vpe_proxy.dev->hw_its, from));
+
+    spin_unlock_irqrestore(&vpe_proxy.lock, flags);
+}
+
 static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
 {
     struct its_vpe *vpe = vcpu->arch.vgic.its_vpe;
@@ -859,6 +1052,7 @@ static int gicv4_vpe_set_affinity(struct vcpu *vcpu)
     ret = its_send_cmd_vmovp(vpe);
     if ( ret )
         goto out;
+    gicv4_vpe_db_proxy_move(vpe, from, to);
 
  out:
     vpe_to_cpuid_unlock(vpe, &flags);
@@ -940,4 +1134,3 @@ int its_vlpi_prop_update(struct pending_irq *pirq, uint8_t 
property,
 
     return its_vlpi_set_doorbell(map, property & LPI_PROP_ENABLED);
 }
-
diff --git a/xen/arch/arm/include/asm/gic_v3_defs.h 
b/xen/arch/arm/include/asm/gic_v3_defs.h
index 0db75309cf..b4d50516ef 100644
--- a/xen/arch/arm/include/asm/gic_v3_defs.h
+++ b/xen/arch/arm/include/asm/gic_v3_defs.h
@@ -20,6 +20,13 @@
 
 #include <xen/sizes.h>
 
+#ifndef FIELD_GET
+#define FIELD_GET(_mask, _reg)                 \
+       ((typeof(_mask))(((_reg) & (_mask)) >> (ffs64(_mask) - 1)))
+#endif
+
+#define FIELD_PREP(_mask, _val)                        \
+       (((typeof(_mask))(_val) << (ffs64(_mask) - 1)) & (_mask))
 /*
  * Additional registers defined in GIC v3.
  * Common GICD registers are defined in gic.h
diff --git a/xen/arch/arm/include/asm/gic_v3_its.h 
b/xen/arch/arm/include/asm/gic_v3_its.h
index dababe97cd..0e82625840 100644
--- a/xen/arch/arm/include/asm/gic_v3_its.h
+++ b/xen/arch/arm/include/asm/gic_v3_its.h
@@ -236,6 +236,11 @@ int its_inv_lpi(struct host_its *its, struct its_device 
*dev,
                 uint32_t eventid, unsigned int cpu);
 int its_send_cmd_mapti(struct host_its *its, uint32_t deviceid,
                        uint32_t eventid, uint32_t pintid, uint16_t icid);
+struct its_device *its_create_device(struct host_its *hw_its,
+                                     uint32_t host_devid, uint64_t nr_events);
+int its_send_cmd_movi(struct host_its *its, uint32_t deviceid, uint32_t 
eventid,
+                      uint16_t icid);
+int its_send_cmd_sync(struct host_its *its, unsigned int cpu);
 #ifdef CONFIG_ACPI
 unsigned long gicv3_its_make_hwdom_madt(const struct domain *d,
                                         void *base_ptr);
@@ -326,6 +331,8 @@ void its_vpe_mask_db(struct its_vpe *vpe);
 int gicv4_its_vlpi_unmap(struct pending_irq *pirq);
 int its_vlpi_prop_update(struct pending_irq *pirq, uint8_t property,
                          bool needs_inv);
+void direct_lpi_inv(struct its_device *dev, uint32_t eventid,
+                    uint32_t db_lpi, unsigned int cpu);
 
 /* ITS quirks handling. */
 uint64_t gicv3_its_get_cacheability(void);
diff --git a/xen/arch/arm/include/asm/gic_v4_its.h 
b/xen/arch/arm/include/asm/gic_v4_its.h
index 37b6b92f0c..1d800fdbaf 100644
--- a/xen/arch/arm/include/asm/gic_v4_its.h
+++ b/xen/arch/arm/include/asm/gic_v4_its.h
@@ -52,6 +52,7 @@ struct event_vlpi_map {
 };
 
 void gicv4_its_vpeid_allocator_init(void);
+int gicv4_init_vpe_proxy(void);
 
 #define GICR_VPROPBASER                              0x0070
 #define GICR_VPENDBASER                              0x0078
@@ -97,6 +98,10 @@ static inline void gits_write_vpendbaser(uint64_t val, void 
__iomem *addr)
 }
 #define gits_read_vpendbaser(c)     readq_relaxed(c)
 
+#define GICR_INVLPIR_INTID                GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID                GICR_INVALLR_VPEID
+#define GICR_INVLPIR_V                    GICR_INVALLR_V
+
 #endif
 
 /*
diff --git a/xen/arch/arm/include/asm/vgic.h b/xen/arch/arm/include/asm/vgic.h
index 9ef667decb..558f81818c 100644
--- a/xen/arch/arm/include/asm/vgic.h
+++ b/xen/arch/arm/include/asm/vgic.h
@@ -407,9 +407,11 @@ extern void vgic_check_inflight_irqs_pending(struct vcpu 
*v,
 
 /* GICV4 functions */
 #ifdef CONFIG_GICV4
+bool gic_support_directLPI(void);
 bool gic_support_vptValidDirty(void);
 bool gic_is_gicv4(void);
 #else
+#define gic_support_directLPI() (false)
 #define gic_support_vptValidDirty() (false)
 #define gic_is_gicv4() (false)
 #endif
-- 
2.51.2

Reply via email to