On 07/20/2018 04:18 AM, David Gibson wrote: > On Fri, Jul 06, 2018 at 11:07:11AM +0200, Cédric Le Goater wrote: >> This proposal introduces a new IRQ number space layout using static >> numbers for all devices, depending on a device index, and a bitmap >> allocator for the MSI IRQ numbers which are negotiated by the guest at >> runtime. >> >> As the VIO device model does not have a device index but a "reg" >> property, we introduce a formula to compute an IRQ number from a "reg" >> value. It should minimize most of the collisions. >> >> The previous layout is kept in pre-3.0 machines raising the >> 'legacy_irq_allocation' machine class flag. >> >> Signed-off-by: Cédric Le Goater <c...@kaod.org> >> --- >> The new XICS layout will only be activated when a new pseries-3.1 >> machine is introduced. > > Looking good. I've noted a few nits below, but they're very minor - > I wouldn't even bother to ask for the changes if we didn't have to > wait for 3.1 anyway.
Yes. np. I have taken into account the nits. We just need to agree on a reg_to_irq formula now. Thanks, C. > > >> >> include/hw/ppc/spapr.h | 5 ++++ >> include/hw/ppc/spapr_irq.h | 32 ++++++++++++++++++++++ >> hw/ppc/spapr.c | 32 +++++++++++++++++++++- >> hw/ppc/spapr_events.c | 12 ++++++-- >> hw/ppc/spapr_irq.c | 56 ++++++++++++++++++++++++++++++++++++++ >> hw/ppc/spapr_pci.c | 29 ++++++++++++++++---- >> hw/ppc/spapr_vio.c | 47 ++++++++++++++++++++++++++++---- >> hw/ppc/Makefile.objs | 2 +- >> 8 files changed, 199 insertions(+), 16 deletions(-) >> create mode 100644 include/hw/ppc/spapr_irq.h >> create mode 100644 hw/ppc/spapr_irq.c >> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 7e5de1a6fd42..73067f5ee8aa 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -8,6 +8,7 @@ >> #include "hw/ppc/spapr_drc.h" >> #include "hw/mem/pc-dimm.h" >> #include "hw/ppc/spapr_ovec.h" >> +#include "hw/ppc/spapr_irq.h" >> >> struct VIOsPAPRBus; >> struct sPAPRPHBState; >> @@ -101,6 +102,8 @@ struct sPAPRMachineClass { >> bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ >> bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ >> bool pre_2_10_has_unused_icps; >> + bool legacy_irq_allocation; >> + >> void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, >> uint64_t *buid, hwaddr *pio, >> hwaddr *mmio32, hwaddr *mmio64, >> @@ -167,6 +170,8 @@ struct sPAPRMachineState { >> char *kvm_type; >> >> const char *icp_type; >> + int32_t irq_map_nr; >> + unsigned long *irq_map; >> >> bool cmd_line_caps[SPAPR_CAP_NUM]; >> sPAPRCapabilities def, eff, mig; >> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h >> new file mode 100644 >> index 000000000000..6f7f50548809 >> --- /dev/null >> +++ b/include/hw/ppc/spapr_irq.h >> @@ -0,0 +1,32 @@ >> +/* >> + * QEMU PowerPC sPAPR IRQ backend definitions >> + * >> + * Copyright (c) 2018, IBM Corporation. >> + * >> + * This code is licensed under the GPL version 2 or later. See the >> + * COPYING file in the top-level directory. >> + */ >> + >> +#ifndef HW_SPAPR_IRQ_H >> +#define HW_SPAPR_IRQ_H >> + >> +/* >> + * IRQ range offsets per device type >> + */ >> +#define SPAPR_IRQ_EPOW 0x1000 /* XICS_IRQ_BASE offset */ >> +#define SPAPR_IRQ_HOTPLUG 0x1001 >> +#define SPAPR_IRQ_VIO 0x1100 /* 256 VIO devices */ >> +#define SPAPR_IRQ_PCI_LSI 0x1200 /* 32+ PHBs devices */ >> + >> +#define SPAPR_IRQ_MSI 0x1300 /* Offset of the dynamic range covered >> + * by the bitmap allocator */ >> + >> +typedef struct sPAPRMachineState sPAPRMachineState; >> + >> +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis); >> +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, >> + Error **errp); >> +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num); >> +void spapr_irq_msi_reset(sPAPRMachineState *spapr); >> + >> +#endif >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 3f5e1d3ec21a..7bc9a48abf04 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -189,6 +189,11 @@ static void xics_system_init(MachineState *machine, int >> nr_irqs, Error **errp) >> sPAPRMachineState *spapr = SPAPR_MACHINE(machine); >> Error *local_err = NULL; >> >> + /* Initialize the MSI IRQ allocator. */ >> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + spapr_irq_msi_init(spapr, XICS_IRQ_BASE + nr_irqs - SPAPR_IRQ_MSI); >> + } >> + >> if (kvm_enabled()) { >> if (machine_kernel_irqchip_allowed(machine) && >> !xics_kvm_init(spapr, &local_err)) { >> @@ -1636,6 +1641,10 @@ static void spapr_machine_reset(void) >> ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); >> } >> >> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + spapr_irq_msi_reset(spapr); >> + } >> + >> qemu_devices_reset(); >> >> /* DRC reset may cause a device to be unplugged. This will cause >> troubles >> @@ -1910,6 +1919,24 @@ static const VMStateDescription >> vmstate_spapr_patb_entry = { >> }, >> }; >> >> +static bool spapr_irq_map_needed(void *opaque) >> +{ >> + sPAPRMachineState *spapr = opaque; >> + >> + return spapr->irq_map && !bitmap_empty(spapr->irq_map, >> spapr->irq_map_nr); >> +} >> + >> +static const VMStateDescription vmstate_spapr_irq_map = { >> + .name = "spapr_irq_map", >> + .version_id = 1, >> + .minimum_version_id = 1, >> + .needed = spapr_irq_map_needed, >> + .fields = (VMStateField[]) { >> + VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr), >> + VMSTATE_END_OF_LIST() >> + }, >> +}; >> + >> static const VMStateDescription vmstate_spapr = { >> .name = "spapr", >> .version_id = 3, >> @@ -1937,6 +1964,7 @@ static const VMStateDescription vmstate_spapr = { >> &vmstate_spapr_cap_cfpc, >> &vmstate_spapr_cap_sbbc, >> &vmstate_spapr_cap_ibs, >> + &vmstate_spapr_irq_map, >> NULL >> } >> }; >> @@ -4067,7 +4095,9 @@ static void >> spapr_machine_3_0_instance_options(MachineState *machine) >> >> static void spapr_machine_3_0_class_options(MachineClass *mc) >> { >> - /* Defaults for the latest behaviour inherited from the base class */ >> + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); >> + >> + smc->legacy_irq_allocation = true; >> } >> >> DEFINE_SPAPR_MACHINE(3_0, "3.0", true); >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >> index e4f5946a2188..cab950d33446 100644 >> --- a/hw/ppc/spapr_events.c >> +++ b/hw/ppc/spapr_events.c >> @@ -709,7 +709,11 @@ void spapr_events_init(sPAPRMachineState *spapr) >> { >> int epow_irq; >> >> - epow_irq = spapr_irq_findone(spapr, &error_fatal); >> + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + epow_irq = spapr_irq_findone(spapr, &error_fatal); >> + } else { >> + epow_irq = SPAPR_IRQ_EPOW; >> + } > > Nit: these can be rearranged more briefly by initializing to the fixed > value, then overriding in the legacy case. > >> >> spapr_irq_claim(spapr, epow_irq, false, &error_fatal); >> >> @@ -731,7 +735,11 @@ void spapr_events_init(sPAPRMachineState *spapr) >> if (spapr->use_hotplug_event_source) { >> int hp_irq; >> >> - hp_irq = spapr_irq_findone(spapr, &error_fatal); >> + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + hp_irq = spapr_irq_findone(spapr, &error_fatal); >> + } else { >> + hp_irq = SPAPR_IRQ_HOTPLUG; >> + } > > Here too. > >> spapr_irq_claim(spapr, hp_irq, false, &error_fatal); >> >> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c >> new file mode 100644 >> index 000000000000..24e9c1d4433c >> --- /dev/null >> +++ b/hw/ppc/spapr_irq.c >> @@ -0,0 +1,56 @@ >> +/* >> + * QEMU PowerPC sPAPR IRQ interface >> + * >> + * Copyright (c) 2018, IBM Corporation. >> + * >> + * This code is licensed under the GPL version 2 or later. See the >> + * COPYING file in the top-level directory. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "qemu/log.h" >> +#include "qemu/error-report.h" >> +#include "qapi/error.h" >> +#include "hw/ppc/spapr.h" >> +#include "hw/ppc/xics.h" >> + >> +void spapr_irq_msi_init(sPAPRMachineState *spapr, uint32_t nr_msis) >> +{ >> + spapr->irq_map_nr = nr_msis; >> + spapr->irq_map = bitmap_new(spapr->irq_map_nr); >> +} >> + >> +int spapr_irq_msi_alloc(sPAPRMachineState *spapr, uint32_t num, bool align, >> + Error **errp) >> +{ >> + int irq; >> + >> + /* >> + * The 'align_mask' parameter of bitmap_find_next_zero_area() >> + * should be one less than a power of 2; 0 means no >> + * alignment. Adapt the 'align' value of the former allocator >> + * to fit the requirements of bitmap_find_next_zero_area() >> + */ >> + align -= 1; >> + >> + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, >> num, >> + align); >> + if (irq == spapr->irq_map_nr) { >> + error_setg(errp, "can't find a free %d-IRQ block", num); >> + return -1; >> + } >> + >> + bitmap_set(spapr->irq_map, irq, num); >> + >> + return irq + SPAPR_IRQ_MSI; >> +} >> + >> +void spapr_irq_msi_free(sPAPRMachineState *spapr, int irq, uint32_t num) >> +{ >> + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); >> +} >> + >> +void spapr_irq_msi_reset(sPAPRMachineState *spapr) >> +{ >> + bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr); >> +} >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c >> index 497b896c7d24..cba5340f4bad 100644 >> --- a/hw/ppc/spapr_pci.c >> +++ b/hw/ppc/spapr_pci.c >> @@ -334,6 +334,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPRMachineState *spapr, >> return; >> } >> >> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); >> + } >> spapr_irq_free(spapr, msi->first_irq, msi->num); >> if (msi_present(pdev)) { >> spapr_msi_setmsg(pdev, 0, false, 0, 0); >> @@ -372,7 +375,13 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPRMachineState *spapr, >> } >> >> /* Allocate MSIs */ >> - irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, >> &err); >> + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, >> + &err); >> + } else { >> + irq = spapr_irq_msi_alloc(spapr, req_num, >> + ret_intr_type == RTAS_TYPE_MSI, &err); >> + } >> if (err) { >> error_reportf_err(err, "Can't allocate MSIs for device %x: ", >> config_addr); >> @@ -392,6 +401,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, >> sPAPRMachineState *spapr, >> >> /* Release previous MSIs */ >> if (msi) { >> + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); >> + } >> spapr_irq_free(spapr, msi->first_irq, msi->num); >> g_hash_table_remove(phb->msi, &config_addr); >> } >> @@ -1708,11 +1720,15 @@ static void spapr_phb_realize(DeviceState *dev, >> Error **errp) >> uint32_t irq; >> Error *local_err = NULL; >> >> - irq = spapr_irq_findone(spapr, &local_err); >> - if (local_err) { >> - error_propagate(errp, local_err); >> - error_prepend(errp, "can't allocate LSIs: "); >> - return; >> + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + irq = spapr_irq_findone(spapr, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + error_prepend(errp, "can't allocate LSIs: "); >> + return; >> + } >> + } else { >> + irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; > > Here again, you can use the new fixed value as the default. > >> } >> >> spapr_irq_claim(spapr, irq, true, &local_err); >> @@ -2123,6 +2139,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, >> _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); >> _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); >> _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); >> + /* TODO: fine tune the total count of allocatable MSIs per PHB */ >> _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", >> XICS_IRQS_SPAPR)); >> >> /* Dynamic DMA window */ >> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c >> index be9af71437cc..a0695200f348 100644 >> --- a/hw/ppc/spapr_vio.c >> +++ b/hw/ppc/spapr_vio.c >> @@ -37,12 +37,13 @@ >> >> #include "hw/ppc/spapr.h" >> #include "hw/ppc/spapr_vio.h" >> -#include "hw/ppc/xics.h" >> #include "hw/ppc/fdt.h" >> #include "trace.h" >> >> #include <libfdt.h> >> >> +#define SPAPR_VIO_REG_BASE 0x71000000 >> + >> static void spapr_vio_get_irq(Object *obj, Visitor *v, const char *name, >> void *opaque, Error **errp) >> { >> @@ -445,6 +446,32 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) >> } >> } >> >> +/* >> + * The register property of a VIO device is defined in livirt using a >> + * base number + 0x1000 increment and in QEMU by incrementing the base >> + * register number 0x71000000. >> + * >> + * The formula below tries to compute a unique index number from the >> + * register value that will be used to define the IRQ number of the >> + * VIO device. A maximum of 256 (0x100) VIO devices is covered. >> + * >> + * To minimize collisions, we define two distinct ranges depending on >> + * the "reg" value definition: >> + * >> + * [0x00 - 0x7f] user/libvirt >> + * [0x80 - 0xff] QEMU VIO model >> + * >> + * Collisions will be detected when the IRQ is claimed. >> + */ >> +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg) >> +{ >> + if (reg >= SPAPR_VIO_REG_BASE) { >> + return SPAPR_IRQ_VIO | (reg & 0x7f) | 0x80; >> + } else { >> + return SPAPR_IRQ_VIO | ((reg & 0x7f) ^ ((reg >> 12) & 0x7f)); >> + } >> +} >> + >> static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) >> { >> sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> @@ -485,10 +512,18 @@ static void spapr_vio_busdev_realize(DeviceState >> *qdev, Error **errp) >> } >> >> if (!dev->irq) { >> - dev->irq = spapr_irq_findone(spapr, &local_err); >> - if (local_err) { >> - error_propagate(errp, local_err); >> - return; >> + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { >> + dev->irq = spapr_irq_findone(spapr, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + return; >> + } >> + } else { >> + dev->irq = spapr_vio_reg_to_irq(dev->reg); >> + if (dev->irq == SPAPR_IRQ_PCI_LSI) { >> + error_setg(errp, "Too many VIO devices"); >> + return; >> + } >> } >> } >> >> @@ -557,7 +592,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void) >> /* Create bus on bridge device */ >> qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); >> bus = SPAPR_VIO_BUS(qbus); >> - bus->next_reg = 0x71000000; >> + bus->next_reg = SPAPR_VIO_REG_BASE; >> >> /* hcall-vio */ >> spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs >> index bcab6323b7ed..4ab556467289 100644 >> --- a/hw/ppc/Makefile.objs >> +++ b/hw/ppc/Makefile.objs >> @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o >> obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o >> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o >> obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o >> -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o >> +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o >> # IBM PowerNV >> obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o >> pnv_occ.o pnv_bmc.o >> ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) >