Michael, could you please review the patch as it is about PCI? Thanks!
On 07/12/2013 05:38 PM, Alexey Kardashevskiy wrote: > On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS > hypercalls which return global IRQ numbers to a guest so it only > operates with those and never touches MSIMessage. > > Therefore MSIMessage handling is completely hidden in QEMU. > > Previously every sPAPR PCI host bridge implemented its own MSI window > to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci > or vfio) and route them to the guest via qemu_pulse_irq(). > MSIMessage used to be encoded as: > .addr - address within the PHB MSI window; > .data - the device index on PHB plus vector number. > The MSI MR write function translated this MSIMessage to a global IRQ > number and called qemu_pulse_irq(). > > However the total number of IRQs is not really big (at the moment it is > 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage > seems to be enough to store an IRQ number there. > > This simplifies MSI handling in sPAPR PHB. Specifically, this does: > 1. remove a MSI window from a PHB; > 2. add a single memory region for all MSIs to sPAPREnvironment > and spapr_pci_msi_init() to initialize it; > 3. encode MSIMessage as: > * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL; > * .data as an IRQ number. > 4. change IRQ allocator to align first IRQ number in a block for MSI. > MSI uses lower bits to specify the vector number so the first IRQ has to > be aligned. MSIX does not need any special allocator though. > > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > --- > hw/ppc/spapr.c | 29 ++++++++++++++++++--- > hw/ppc/spapr_pci.c | 61 > ++++++++++++++++++++------------------------- > include/hw/pci-host/spapr.h | 8 +++--- > include/hw/ppc/spapr.h | 4 ++- > 4 files changed, 60 insertions(+), 42 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 671bbd9..6b21191 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi) > > if (hint) { > irq = hint; > + if (hint >= spapr->next_irq) { > + spapr->next_irq = hint + 1; > + } > /* FIXME: we should probably check for collisions somehow */ > } else { > irq = spapr->next_irq++; > @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi) > return irq; > } > > -/* Allocate block of consequtive IRQs, returns a number of the first */ > -int spapr_allocate_irq_block(int num, bool lsi) > +/* > + * Allocate block of consequtive IRQs, returns a number of the first. > + * If msi==true, aligns the first IRQ number to num. > + */ > +int spapr_allocate_irq_block(int num, bool lsi, bool msi) > { > int first = -1; > - int i; > + int i, hint = 0; > + > + /* > + * MSIMesage::data is used for storing VIRQ so > + * it has to be aligned to num to support multiple > + * MSI vectors. MSI-X is not affected by this. > + * The hint is used for the first IRQ, the rest should > + * be allocated continously. > + */ > + if (msi) { > + assert((num == 1) || (num == 2) || (num == 4) || > + (num == 8) || (num == 16) || (num == 32)); > + hint = (spapr->next_irq + num - 1) & ~(num - 1); > + } > > for (i = 0; i < num; ++i) { > int irq; > > - irq = spapr_allocate_irq(0, lsi); > + irq = spapr_allocate_irq(hint, lsi); > if (!irq) { > return -1; > } > > if (0 == i) { > first = irq; > + hint = 0; > } > > /* If the above doesn't create a consecutive block then that's > @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) > spapr_create_nvram(spapr); > > /* Set up PCI */ > + spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW); > spapr_pci_rtas_init(); > > phb = spapr_create_phb(spapr, 0); > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index dfe4d04..c8ea777 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb, > uint32_t config_addr, > * This is required for msi_notify()/msix_notify() which > * will write at the addresses via spapr_msi_write(). > */ > -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, > - bool msix, unsigned req_num) > +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, > + unsigned first_irq, unsigned req_num) > { > unsigned i; > - MSIMessage msg = { .address = addr, .data = 0 }; > + MSIMessage msg = { .address = addr, .data = first_irq }; > > if (!msix) { > msi_set_message(pdev, msg); > @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, > return; > } > > - for (i = 0; i < req_num; ++i) { > - msg.address = addr | (i << 2); > + for (i = 0; i < req_num; ++i, ++msg.data) { > msix_set_message(pdev, i, msg); > trace_spapr_pci_msi_setup(pdev->name, i, msg.address); > } > @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > > /* There is no cached config, allocate MSIs */ > if (!phb->msi_table[ndev].nvec) { > - irq = spapr_allocate_irq_block(req_num, false); > + irq = spapr_allocate_irq_block(req_num, false, > + ret_intr_type == RTAS_TYPE_MSI); > if (irq < 0) { > fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev); > rtas_st(rets, 0, -1); /* Hardware error */ > @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > } > > /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ > - spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16), > - ret_intr_type == RTAS_TYPE_MSIX, req_num); > + spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == > RTAS_TYPE_MSIX, > + phb->msi_table[ndev].irq, req_num); > > rtas_st(rets, 0, 0); > rtas_st(rets, 1, req_num); > @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = { > static void spapr_msi_write(void *opaque, hwaddr addr, > uint64_t data, unsigned size) > { > - sPAPRPHBState *phb = opaque; > - int ndev = addr >> 16; > - int vec = ((addr & 0xFFFF) >> 2) | data; > - uint32_t irq = phb->msi_table[ndev].irq + vec; > + uint32_t irq = data; > > trace_spapr_pci_msi_write(addr, data, irq); > > @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = { > .endianness = DEVICE_LITTLE_ENDIAN > }; > > +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr) > +{ > + /* > + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, > + * we need to allocate some memory to catch those writes coming > + * from msi_notify()/msix_notify(). > + * As MSIMessage:addr is going to be the same and MSIMessage:data > + * is going to be a VIRQ number, 4 bytes of the MSI MR will only > + * be used. > + */ > + spapr->msi_win_addr = addr; > + memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr, > + "msi", getpagesize()); > + memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr, > + &spapr->msiwindow); > +} > + > /* > * PHB PCI device > */ > @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s) > > if ((sphb->buid != -1) || (sphb->dma_liobn != -1) > || (sphb->mem_win_addr != -1) > - || (sphb->io_win_addr != -1) > - || (sphb->msi_win_addr != -1)) { > + || (sphb->io_win_addr != -1)) { > fprintf(stderr, "Either \"index\" or other parameters must" > " be specified for PAPR PHB, not both\n"); > return -1; > @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s) > + sphb->index * SPAPR_PCI_WINDOW_SPACING; > sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF; > sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF; > - sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF; > } > > if (sphb->buid == -1) { > @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s) > return -1; > } > > - if (sphb->msi_win_addr == -1) { > - fprintf(stderr, "MSI window address not specified for PHB\n"); > - return -1; > - } > - > if (find_phb(spapr, sphb->buid)) { > fprintf(stderr, "PCI host bridges must have unique BUIDs\n"); > return -1; > @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s) > namebuf, SPAPR_PCI_IO_WIN_SIZE); > memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, > &sphb->iowindow); > - > - /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, > - * we need to allocate some memory to catch those writes coming > - * from msi_notify()/msix_notify() */ > - if (msi_supported) { > - sprintf(namebuf, "%s.msi", sphb->dtbusname); > - memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), > &spapr_msi_ops, sphb, > - namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000); > - memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr, > - &sphb->msiwindow); > - } > - > /* > * Selecting a busname is more complex than you'd think, due to > * interacting constraints. If the user has specified an id > @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = { > DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1), > DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size, > SPAPR_PCI_IO_WIN_SIZE), > - DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1), > DEFINE_PROP_END_OF_LIST(), > }; > > @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = { > VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState), > VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState), > VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState), > - VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState), > VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, > vmstate_spapr_pci_lsi, struct spapr_pci_lsi), > VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, > 0, > diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h > index 93f9511..970b4a9 100644 > --- a/include/hw/pci-host/spapr.h > +++ b/include/hw/pci-host/spapr.h > @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState { > > MemoryRegion memspace, iospace; > hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; > - hwaddr msi_win_addr; > - MemoryRegion memwindow, iowindow, msiwindow; > + MemoryRegion memwindow, iowindow; > > uint32_t dma_liobn; > uint64_t dma_window_start; > @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState { > #define SPAPR_PCI_MMIO_WIN_SIZE 0x20000000 > #define SPAPR_PCI_IO_WIN_OFF 0x80000000 > #define SPAPR_PCI_IO_WIN_SIZE 0x10000 > -#define SPAPR_PCI_MSI_WIN_OFF 0x90000000 > + > +#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL > > #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL > > @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > uint32_t xics_phandle, > void *fdt); > > +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr); > + > void spapr_pci_rtas_init(void); > > #endif /* __HW_SPAPR_PCI_H__ */ > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 7aa9a3d..aed02e1 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -14,6 +14,8 @@ struct icp_state; > typedef struct sPAPREnvironment { > struct VIOsPAPRBus *vio_bus; > QLIST_HEAD(, sPAPRPHBState) phbs; > + hwaddr msi_win_addr; > + MemoryRegion msiwindow; > struct sPAPRNVRAM *nvram; > struct icp_state *icp; > > @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, > target_ulong opcode, > target_ulong *args); > > int spapr_allocate_irq(int hint, bool lsi); > -int spapr_allocate_irq_block(int num, bool lsi); > +int spapr_allocate_irq_block(int num, bool lsi, bool msi); > > static inline int spapr_allocate_msi(int hint) > { > -- Alexey