Add MSI support to the ASPEED PCIe RC/Config model and introduce a per-RC
"IOMMU root" address space to correctly route MSI writes.

On AST2700 all RCs use the same MSI address, and the MSI target is PCI
system memory (not normal DRAM). If the MSI window were mapped into real
system RAM, an endpoint's write could be observed by other RCs and
spuriously trigger their interrupts. To avoid this, each RC now owns an
isolated IOMMU root AddressSpace that contains a small MSI window and a
DRAM alias region for normal DMA.

The MSI window captures writes and asserts the RC IRQ. MSI status bits
are tracked in new H2X RC_H registers (R_H2X_RC_H_MSI_EN{0,1} and
R_H2X_RC_H_MSI_STS{0,1}). Clearing all status bits drops the IRQ. The
default MSI address is set to 0x1e77005c and can be overridden via the
msi-addr property.

This keeps MSI traffic contained within each RC while preserving normal
DMA to system DRAM. It enables correct MSI/MSI-X interrupt delivery when
multiple RCs use the same MSI target address.

Signed-off-by: Jamin Lin <jamin_...@aspeedtech.com>
---
 include/hw/pci-host/aspeed_pcie.h |  10 +++
 hw/pci-host/aspeed_pcie.c         | 132 ++++++++++++++++++++++++++++++
 hw/pci-host/trace-events          |   3 +
 3 files changed, 145 insertions(+)

diff --git a/include/hw/pci-host/aspeed_pcie.h 
b/include/hw/pci-host/aspeed_pcie.h
index 8da9537207..6bc54659ee 100644
--- a/include/hw/pci-host/aspeed_pcie.h
+++ b/include/hw/pci-host/aspeed_pcie.h
@@ -36,6 +36,8 @@ typedef struct AspeedPCIECfgTxDesc {
 typedef struct AspeedPCIERcRegs {
     uint32_t int_en_reg;
     uint32_t int_sts_reg;
+    uint32_t msi_sts0_reg;
+    uint32_t msi_sts1_reg;
 } AspeedPCIERcRegs;
 
 typedef struct AspeedPCIERegMap {
@@ -55,11 +57,18 @@ OBJECT_DECLARE_SIMPLE_TYPE(AspeedPCIERcState, 
ASPEED_PCIE_RC);
 struct AspeedPCIERcState {
     PCIExpressHost parent_obj;
 
+    MemoryRegion iommu_root;
+    AddressSpace iommu_as;
+    MemoryRegion dram_alias;
+    MemoryRegion *dram_mr;
     MemoryRegion mmio_window;
+    MemoryRegion msi_window;
     MemoryRegion io_window;
     MemoryRegion mmio;
     MemoryRegion io;
 
+    uint64_t dram_base;
+    uint32_t msi_addr;
     uint32_t bus_nr;
     char name[16];
     qemu_irq irq;
@@ -87,6 +96,7 @@ struct AspeedPCIECfgClass {
     const AspeedPCIERegMap *reg_map;
     const MemoryRegionOps *reg_ops;
 
+    uint32_t rc_msi_addr;
     uint64_t rc_bus_nr;
     uint64_t nr_regs;
 };
diff --git a/hw/pci-host/aspeed_pcie.c b/hw/pci-host/aspeed_pcie.c
index b095375c7d..566feaebc7 100644
--- a/hw/pci-host/aspeed_pcie.c
+++ b/hw/pci-host/aspeed_pcie.c
@@ -65,6 +65,8 @@ static const TypeInfo aspeed_pcie_root_info = {
  * PCIe Root Complex (RC)
  */
 
+#define ASPEED_PCIE_CFG_RC_MAX_MSI 64
+
 static void aspeed_pcie_rc_set_irq(void *opaque, int irq, int level)
 {
     AspeedPCIERcState *rc = (AspeedPCIERcState *) opaque;
@@ -94,6 +96,61 @@ static int aspeed_pcie_rc_map_irq(PCIDevice *pci_dev, int 
irq_num)
     return irq_num % PCI_NUM_PINS;
 }
 
+static void aspeed_pcie_rc_msi_notify(AspeedPCIERcState *rc, uint64_t data)
+{
+    AspeedPCIECfgState *cfg =
+           container_of(rc, AspeedPCIECfgState, rc);
+    AspeedPCIECfgClass *apc = ASPEED_PCIE_CFG_GET_CLASS(cfg);
+    const AspeedPCIERcRegs *rc_regs;
+    uint32_t reg;
+
+    /* Written data is the HW IRQ number */
+    assert(data < ASPEED_PCIE_CFG_RC_MAX_MSI);
+
+    rc_regs = &apc->reg_map->rc;
+
+    reg = (data < 32) ? rc_regs->msi_sts0_reg : rc_regs->msi_sts1_reg;
+    cfg->regs[reg] |= BIT(data % 32);
+
+    trace_aspeed_pcie_rc_msi_set_irq(cfg->id, data, 1);
+    qemu_set_irq(rc->irq, 1);
+}
+
+static void aspeed_pcie_rc_msi_write(void *opaque, hwaddr addr, uint64_t data,
+                                     unsigned int size)
+{
+    AspeedPCIERcState *rc = ASPEED_PCIE_RC(opaque);
+    AspeedPCIECfgState *cfg =
+           container_of(rc, AspeedPCIECfgState, rc);
+
+    trace_aspeed_pcie_rc_msi_notify(cfg->id, addr + rc->msi_addr, data);
+    aspeed_pcie_rc_msi_notify(rc, data);
+}
+
+static const MemoryRegionOps aspeed_pcie_rc_msi_ops = {
+    .write = aspeed_pcie_rc_msi_write,
+    .read = NULL,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static AddressSpace *aspeed_pcie_rc_get_as(PCIBus *bus, void *opaque, int 
devfn)
+{
+    AspeedPCIERcState *rc = ASPEED_PCIE_RC(opaque);
+    return &rc->iommu_as;
+}
+
+static const PCIIOMMUOps aspeed_pcie_rc_iommu_ops = {
+    .get_address_space = aspeed_pcie_rc_get_as,
+};
+
 static void aspeed_pcie_rc_realize(DeviceState *dev, Error **errp)
 {
     PCIExpressHost *pex = PCIE_HOST_BRIDGE(dev);
@@ -130,6 +187,42 @@ static void aspeed_pcie_rc_realize(DeviceState *dev, Error 
**errp)
                                      &rc->io, 0, 4, TYPE_PCIE_BUS);
     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
 
+   /*
+    * PCIe memory view setup
+    *
+    * Background:
+    * - On AST2700, all Root Complexes use the same MSI address. This MSI
+    *   address is not normal system RAM - it is a PCI system memory address.
+    *   If we map the MSI/MSI-X window into real system memory, a write from
+    *   one EP can be seen by all RCs and wrongly trigger interrupts on them.
+    *
+    * Design:
+    * - MSI/MSI-X here is just a placeholder address so RC and EP can talk.
+    *   We make a separate MMIO space (iommu_root) for the MSI window so the
+    *   writes stay local to each RC.
+    *
+    * DMA:
+    * - EPs still need access to real system memory for DMA. We add a DRAM
+    *   alias in the PCI space so DMA works as expected.
+    */
+    name = g_strdup_printf("pcie.%d.iommu_root", cfg->id);
+    memory_region_init(&rc->iommu_root, OBJECT(rc), name, UINT64_MAX);
+    address_space_init(&rc->iommu_as, &rc->iommu_root, name);
+    /* setup MSI */
+    memory_region_init_io(&rc->msi_window, OBJECT(rc),
+                          &aspeed_pcie_rc_msi_ops, rc,
+                          "msi_window", 4);
+    memory_region_add_subregion(&rc->iommu_root, rc->msi_addr,
+                                &rc->msi_window);
+    /* setup DRAM for DMA */
+    assert(rc->dram_mr != NULL);
+    name = g_strdup_printf("pcie.%d.dram_alias", cfg->id);
+    memory_region_init_alias(&rc->dram_alias, OBJECT(rc), name, rc->dram_mr,
+                             0, memory_region_size(rc->dram_mr));
+    memory_region_add_subregion(&rc->iommu_root, rc->dram_base,
+                                &rc->dram_alias);
+    pci_setup_iommu(pci->bus, &aspeed_pcie_rc_iommu_ops, rc);
+
     qdev_realize(DEVICE(&rc->root), BUS(pci->bus), &error_fatal);
 }
 
@@ -155,6 +248,10 @@ static void aspeed_pcie_rc_instance_init(Object *obj)
 
 static const Property aspeed_pcie_rc_props[] = {
     DEFINE_PROP_UINT32("bus-nr", AspeedPCIERcState, bus_nr, 0),
+    DEFINE_PROP_UINT32("msi-addr", AspeedPCIERcState, msi_addr, 0),
+    DEFINE_PROP_UINT64("dram-base", AspeedPCIERcState, dram_base, 0),
+    DEFINE_PROP_LINK("dram", AspeedPCIERcState, dram_mr, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
 };
 
 static void aspeed_pcie_rc_class_init(ObjectClass *klass, const void *data)
@@ -215,6 +312,10 @@ REG32(H2X_RC_H_INT_STS,     0xC8)
     SHARED_FIELD(H2X_RC_INT_INTDONE, 4, 1)
     SHARED_FIELD(H2X_RC_INT_INTX, 0, 4)
 REG32(H2X_RC_H_RDATA,       0xCC)
+REG32(H2X_RC_H_MSI_EN0,     0xE0)
+REG32(H2X_RC_H_MSI_EN1,     0xE4)
+REG32(H2X_RC_H_MSI_STS0,    0xE8)
+REG32(H2X_RC_H_MSI_STS1,    0xEC)
 
 #define TLP_FMTTYPE_CFGRD0  0x04 /* Configuration Read  Type 0 */
 #define TLP_FMTTYPE_CFGWR0  0x44 /* Configuration Write Type 0 */
@@ -228,6 +329,8 @@ static const AspeedPCIERegMap aspeed_regmap = {
     .rc = {
         .int_en_reg     = R_H2X_RC_H_INT_EN,
         .int_sts_reg    = R_H2X_RC_H_INT_STS,
+        .msi_sts0_reg   = R_H2X_RC_H_MSI_STS0,
+        .msi_sts1_reg   = R_H2X_RC_H_MSI_STS1,
     },
 };
 
@@ -399,6 +502,29 @@ static void aspeed_pcie_cfg_write(void *opaque, hwaddr 
addr, uint64_t data,
         }
         s->regs[reg] &= ~data | H2X_RC_INT_INTX_MASK;
         break;
+    /*
+     * These status registers are used for notify sources ISR are executed.
+     * If one source ISR is executed, it will clear one bit.
+     * If it clear all bits, it means to initialize this register status
+     * rather than sources ISR are executed.
+     */
+    case R_H2X_RC_H_MSI_STS0:
+    case R_H2X_RC_H_MSI_STS1:
+        if (data == 0) {
+            return ;
+        }
+
+        s->regs[reg] &= ~data;
+        if (data == 0xffffffff) {
+            return;
+        }
+
+        if (!s->regs[R_H2X_RC_H_MSI_STS0] &&
+            !s->regs[R_H2X_RC_H_MSI_STS1]) {
+            trace_aspeed_pcie_rc_msi_clear_irq(s->id, 0);
+            qemu_set_irq(s->rc.irq, 0);
+        }
+        break;
     default:
         s->regs[reg] = data;
         break;
@@ -420,6 +546,8 @@ static void aspeed_pcie_cfg_instance_init(Object *obj)
     AspeedPCIECfgState *s = ASPEED_PCIE_CFG(obj);
 
     object_initialize_child(obj, "rc", &s->rc, TYPE_ASPEED_PCIE_RC);
+    object_property_add_alias(obj, "dram", OBJECT(&s->rc), "dram");
+    object_property_add_alias(obj, "dram-base", OBJECT(&s->rc), "dram-base");
 
     return;
 }
@@ -448,6 +576,9 @@ static void aspeed_pcie_cfg_realize(DeviceState *dev, Error 
**errp)
     object_property_set_int(OBJECT(&s->rc), "bus-nr",
                             apc->rc_bus_nr,
                             &error_abort);
+    object_property_set_int(OBJECT(&s->rc), "msi-addr",
+                            apc->rc_msi_addr,
+                            &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->rc), errp)) {
         return;
     }
@@ -479,6 +610,7 @@ static void aspeed_pcie_cfg_class_init(ObjectClass *klass, 
const void *data)
     apc->reg_ops = &aspeed_pcie_cfg_ops;
     apc->reg_map = &aspeed_regmap;
     apc->nr_regs = 0x100 >> 2;
+    apc->rc_msi_addr = 0x1e77005C;
     apc->rc_bus_nr = 0x80;
 }
 
diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events
index 2584ea56e2..a6fd88c2c4 100644
--- a/hw/pci-host/trace-events
+++ b/hw/pci-host/trace-events
@@ -2,6 +2,9 @@
 
 # aspeed_pcie.c
 aspeed_pcie_rc_intx_set_irq(uint32_t id, int num, int level) "%d: num %d set 
IRQ leve %d"
+aspeed_pcie_rc_msi_notify(uint32_t id, uint64_t addr, uint64_t data) "%d: 0x%" 
PRIx64 " data 0x%" PRIx64
+aspeed_pcie_rc_msi_set_irq(uint32_t id, uint64_t unm, int level) "%d: num 0x%" 
PRIx64 " set IRQ level %d"
+aspeed_pcie_rc_msi_clear_irq(uint32_t id, int level) "%d: clear IRQ level %d"
 aspeed_pcie_cfg_read(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 
0x%" PRIx64 " value 0x%" PRIx32
 aspeed_pcie_cfg_write(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 
0x%" PRIx64 " value 0x%" PRIx32
 aspeed_pcie_cfg_rw(uint32_t id, const char *dir, uint8_t bus, uint8_t devfn, 
uint64_t addr, uint64_t data) "%d: %s bus:0x%x devfn:0x%x addr 0x%" PRIx64 " 
data 0x%" PRIx64
-- 
2.43.0


Reply via email to