On Fri, Apr 20, 2018 at 10:27:21AM +0200, Cédric Le Goater wrote: > On 04/20/2018 09:10 AM, David Gibson wrote: > > On Thu, Apr 19, 2018 at 02:42:57PM +0200, Cédric Le Goater wrote: > >> Each XIVE interrupt source is associated with a two bit state machine > >> called an Event State Buffer (ESB) : the first bit "P" means that an > >> interrupt is "pending" and waiting for an EOI and the bit "Q" (queued) > >> means a new interrupt was triggered while another was still pending. > >> > >> When an event is triggered, the associated interrupt state bits are > >> fetched and modified and forwarded to the virtualization engine of the > >> controller doing the routing. These can also be controlled by MMIO, to > >> trigger events or turn off the sources for instance. See code for more > >> details on the states and transitions. > >> > >> On a sPAPR machine, the OS will obtain the address of the MMIO page of > >> the ESB entry associated with a source and its characteristic using > >> the H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is > >> used. > >> > >> The xive_source_notify() routine is in charge forwarding the source > >> event notification to the routing engine. It will be filled later on. > >> > >> Signed-off-by: Cédric Le Goater <c...@kaod.org> > >> --- > >> Changes since v2: > >> > >> - added support for Store EOI > >> - added support for two page MMIO setting like on KVM > > > > Looks generally sane to me, though I have a few queries. > > > >> > >> default-configs/ppc64-softmmu.mak | 1 + > >> hw/intc/Makefile.objs | 1 + > >> hw/intc/xive.c | 335 > >> ++++++++++++++++++++++++++++++++++++++ > >> include/hw/ppc/xive.h | 130 +++++++++++++++ > >> 4 files changed, 467 insertions(+) > >> create mode 100644 hw/intc/xive.c > >> create mode 100644 include/hw/ppc/xive.h > >> > >> diff --git a/default-configs/ppc64-softmmu.mak > >> b/default-configs/ppc64-softmmu.mak > >> index b94af6c7c62a..c6d13e757977 100644 > >> --- a/default-configs/ppc64-softmmu.mak > >> +++ b/default-configs/ppc64-softmmu.mak > >> @@ -16,4 +16,5 @@ CONFIG_VIRTIO_VGA=y > >> CONFIG_XICS=$(CONFIG_PSERIES) > >> CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) > >> CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) > >> +CONFIG_XIVE=$(CONFIG_PSERIES) > >> CONFIG_MEM_HOTPLUG=y > >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs > >> index 0e9963f5eecc..72a46ed91c31 100644 > >> --- a/hw/intc/Makefile.objs > >> +++ b/hw/intc/Makefile.objs > >> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o > >> obj-$(CONFIG_XICS) += xics.o > >> obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o > >> obj-$(CONFIG_XICS_KVM) += xics_kvm.o > >> +obj-$(CONFIG_XIVE) += xive.o > >> obj-$(CONFIG_POWERNV) += xics_pnv.o > >> obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o > >> obj-$(CONFIG_S390_FLIC) += s390_flic.o > >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c > >> new file mode 100644 > >> index 000000000000..c70578759d02 > >> --- /dev/null > >> +++ b/hw/intc/xive.c > >> @@ -0,0 +1,335 @@ > >> +/* > >> + * QEMU PowerPC XIVE interrupt controller model > >> + * > >> + * Copyright (c) 2017-2018, IBM Corporation. > >> + * > >> + * This code is licensed under the GPL version 2 or later. See the > >> + * COPYING file in the top-level directory. > >> + */ > >> + > >> +#include "qemu/osdep.h" > >> +#include "qemu/log.h" > >> +#include "qapi/error.h" > >> +#include "target/ppc/cpu.h" > >> +#include "sysemu/cpus.h" > >> +#include "sysemu/dma.h" > >> +#include "monitor/monitor.h" > >> +#include "hw/ppc/xive.h" > >> + > >> +/* > >> + * XIVE Interrupt Source > >> + */ > >> + > >> +uint8_t xive_source_pq_get(XiveSource *xsrc, uint32_t srcno) > >> +{ > >> + uint32_t byte = srcno / 4; > >> + uint32_t bit = (srcno % 4) * 2; > >> + > >> + assert(byte < xsrc->sbe_size); > >> + > >> + return (xsrc->sbe[byte] >> bit) & 0x3; > >> +} > >> + > >> +uint8_t xive_source_pq_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq) > >> +{ > >> + uint32_t byte = srcno / 4; > >> + uint32_t bit = (srcno % 4) * 2; > >> + uint8_t old, new; > >> + > >> + assert(byte < xsrc->sbe_size); > >> + > >> + old = xsrc->sbe[byte]; > >> + > >> + new = xsrc->sbe[byte] & ~(0x3 << bit); > >> + new |= (pq & 0x3) << bit; > >> + > >> + xsrc->sbe[byte] = new; > >> + > >> + return (old >> bit) & 0x3; > >> +} > >> + > >> +static bool xive_source_pq_eoi(XiveSource *xsrc, uint32_t srcno) > >> +{ > >> + uint8_t old_pq = xive_source_pq_get(xsrc, srcno); > >> + > >> + switch (old_pq) { > >> + case XIVE_ESB_RESET: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET); > >> + return false; > >> + case XIVE_ESB_PENDING: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_RESET); > >> + return false; > >> + case XIVE_ESB_QUEUED: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING); > >> + return true; > >> + case XIVE_ESB_OFF: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF); > >> + return false; > >> + default: > >> + g_assert_not_reached(); > >> + } > >> +} > >> + > >> +/* > >> + * Returns whether the event notification should be forwarded. > >> + */ > >> +static bool xive_source_pq_trigger(XiveSource *xsrc, uint32_t srcno) > >> +{ > >> + uint8_t old_pq = xive_source_pq_get(xsrc, srcno); > >> + > >> + switch (old_pq) { > >> + case XIVE_ESB_RESET: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING); > >> + return true; > >> + case XIVE_ESB_PENDING: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED); > >> + return false; > >> + case XIVE_ESB_QUEUED: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_QUEUED); > >> + return false; > >> + case XIVE_ESB_OFF: > >> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_OFF); > >> + return false; > >> + default: > >> + g_assert_not_reached(); > >> + } > >> +} > >> + > >> +/* > >> + * Forward the source event notification to the associated XiveFabric, > >> + * the device owning the sources. > >> + */ > >> +static void xive_source_notify(XiveSource *xsrc, int srcno) > >> +{ > >> + > >> +} > >> + > >> +/* In a two pages ESB MMIO setting, even page is the trigger page, odd > >> + * page is for management */ > > > > Can I understand from this that the result from this function is only > > meaningful in 2-pages mode? > > yes. May be I should rename it to xive_source_is_even_page() ?
Seems very long winded. Maybe keep the name but have it check both whether it's an even page and also whether you're in 2pages mode. > >> +static inline bool xive_source_is_trigger_page(hwaddr addr) > >> +{ > >> + return !((addr >> 16) & 1); > > > > Later on you seem to have both 4k and 64k variants list, but here you > > hardcode 64k. Is that a problem? > > oups. This should be : > > (addr >> (xsrc->esb_shift - 1)) > > I did the tests with the spapr guest which uses 64k ESB MMIO pages. > The check is only significant in a 2 pages setting. Ok. > >> +} > >> + > >> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned > >> size) > >> +{ > >> + XiveSource *xsrc = XIVE_SOURCE(opaque); > >> + uint32_t offset = addr & 0xF00; > > > > You ignore the low bits of the address entirely, so effective you have > > a 256 byte range that's all aliases of the same register. Is that > > intentional? > > yes but it's not entirely correct. The exact ranges are : > > Load Store > > 0x000 .. 0x3FF EOI and return 0|1 Trigger > 0x400 .. 0x7FF EOI and return 0|1 EOI > 0x800 .. 0xBFF return PQ undefined > 0xC00 .. 0xCFF return PQ and PQ=00 PQ=00 > 0xD00 .. 0xDFF return PQ and PQ=01 PQ=01 > 0xE00 .. 0xDFF return PQ and PQ=10 PQ=10 > 0xF00 .. 0xDFF return PQ and PQ=11 PQ=11 > > There is room for some improvements. > > The trigger page in a two pages ESB MMIO settings only triggers on stores. Ok. > >> + uint32_t srcno = addr >> xsrc->esb_shift; > >> + uint64_t ret = -1; > >> + > >> + if (xive_source_esb_2page(xsrc) && xive_source_is_trigger_page(addr)) > >> { > >> + qemu_log_mask(LOG_GUEST_ERROR, > >> + "XIVE: invalid load on IRQ %d trigger page at " > >> + "0x%"HWADDR_PRIx"\n", srcno, addr); > >> + return -1; > >> + } > >> + > >> + switch (offset) { > >> + case XIVE_ESB_LOAD_EOI: > >> + /* > >> + * Load EOI is not the default source setting under QEMU, but > >> + * this is what HW uses currently. > >> + */ > >> + ret = xive_source_pq_eoi(xsrc, srcno); > > > > You're implicitly casting a bool return value into a u64 here, is that > > intentional? > > yes. is that bad ? This is what the load is supposed to return in the > transition > algo. No, that's fine, as long as just using the LSB in your return is correct. Just making sure I understood. > >> + > >> + break; > >> + > >> + case XIVE_ESB_GET: > >> + ret = xive_source_pq_get(xsrc, srcno); > >> + break; > >> + > >> + case XIVE_ESB_SET_PQ_00: > >> + case XIVE_ESB_SET_PQ_01: > >> + case XIVE_ESB_SET_PQ_10: > >> + case XIVE_ESB_SET_PQ_11: > >> + ret = xive_source_pq_set(xsrc, srcno, (offset >> 8) & 0x3); > >> + break; > >> + default: > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB addr %d\n", > >> offset); > >> + } > >> + > >> + return ret; > >> +} > >> + > >> +static void xive_source_esb_write(void *opaque, hwaddr addr, > >> + uint64_t value, unsigned size) > >> +{ > >> + XiveSource *xsrc = XIVE_SOURCE(opaque); > >> + uint32_t offset = addr & 0xF00; > >> + uint32_t srcno = addr >> xsrc->esb_shift; > >> + bool notify = false; > >> + > >> + switch (offset) { > >> + case 0: > >> + notify = xive_source_pq_trigger(xsrc, srcno); > >> + break; > >> + > >> + case XIVE_ESB_STORE_EOI: > >> + if (xive_source_is_trigger_page(addr)) { > >> + qemu_log_mask(LOG_GUEST_ERROR, > >> + "XIVE: invalid store on IRQ %d trigger page at " > >> + "0x%"HWADDR_PRIx"\n", srcno, addr); > >> + return; > >> + } > >> + > >> + if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) { > >> + qemu_log_mask(LOG_GUEST_ERROR, > >> + "XIVE: invalid Store EOI for IRQ %d\n", srcno); > >> + return; > >> + } > >> + > >> + /* If the Q bit is set, we should forward a new source event > >> + * notification > >> + */ > >> + notify = xive_source_pq_eoi(xsrc, srcno); > >> + break; > >> + > >> + default: > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr > >> %d\n", > >> + offset); > >> + return; > >> + } > >> + > >> + /* Forward the source event notification for routing */ > >> + if (notify) { > >> + xive_source_notify(xsrc, srcno); > >> + } > > > > EOI via this path calls notify, but the one via the read path > > doesn't. Is that correct? > > No. I have given attention to the one page ESB MMIO setting + Store EOI > in the emulated mode and not enough to the two pages ESB MMIO setting. > This is a late change to be compatible with KVM. I will fix. Ok. > > > >> +} > >> + > >> +static const MemoryRegionOps xive_source_esb_ops = { > >> + .read = xive_source_esb_read, > >> + .write = xive_source_esb_write, > >> + .endianness = DEVICE_BIG_ENDIAN, > >> + .valid = { > >> + .min_access_size = 8, > >> + .max_access_size = 8, > >> + }, > >> + .impl = { > >> + .min_access_size = 8, > >> + .max_access_size = 8, > >> + }, > >> +}; > >> + > >> +static void xive_source_set_irq(void *opaque, int srcno, int val) > >> +{ > >> + XiveSource *xsrc = XIVE_SOURCE(opaque); > >> + bool notify = false; > >> + > >> + if (val) { > >> + notify = xive_source_pq_trigger(xsrc, srcno); > >> + } > >> + > >> + /* Forward the source event notification for routing */ > >> + if (notify) { > >> + xive_source_notify(xsrc, srcno); > >> + } > >> +} > >> + > >> +void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon) > >> +{ > >> + int i; > >> + > >> + monitor_printf(mon, "XIVE Source %6x ..%6x\n", > >> + xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1); > >> + for (i = 0; i < xsrc->nr_irqs; i++) { > >> + uint8_t pq = xive_source_pq_get(xsrc, i); > >> + uint32_t lisn = i + xsrc->offset; > >> + > >> + if (pq == XIVE_ESB_OFF) { > >> + continue; > >> + } > >> + > >> + monitor_printf(mon, " %4x %c%c\n", lisn, > >> + pq & XIVE_ESB_VAL_P ? 'P' : '-', > >> + pq & XIVE_ESB_VAL_Q ? 'Q' : '-'); > >> + } > >> +} > >> + > >> +static void xive_source_reset(DeviceState *dev) > >> +{ > >> + XiveSource *xsrc = XIVE_SOURCE(dev); > >> + > >> + /* SBEs are initialized to 0b01 which corresponds to "ints off" */ > >> + memset(xsrc->sbe, 0x55, xsrc->sbe_size); > >> +} > >> + > >> +static void xive_source_realize(DeviceState *dev, Error **errp) > >> +{ > >> + XiveSource *xsrc = XIVE_SOURCE(dev); > >> + > >> + if (!xsrc->nr_irqs) { > >> + error_setg(errp, "Number of interrupt needs to be greater than > >> 0"); > >> + return; > >> + } > >> + > >> + if (xsrc->esb_shift != XIVE_ESB_4K && > >> + xsrc->esb_shift != XIVE_ESB_4K_2PAGE && > >> + xsrc->esb_shift != XIVE_ESB_64K && > >> + xsrc->esb_shift != XIVE_ESB_64K_2PAGE) { > >> + error_setg(errp, "Invalid ESB shift setting"); > >> + return; > >> + } > >> + > >> + xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, > >> + xsrc->nr_irqs); > >> + > >> + /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per byte > >> */ > >> + xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4); > >> + xsrc->sbe = g_malloc0(xsrc->sbe_size); > >> + > >> + /* TODO: H_INT_ESB support, which removing the ESB MMIOs */ > >> + > >> + memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), > >> + &xive_source_esb_ops, xsrc, "xive.esb", > >> + (1ull << xsrc->esb_shift) * xsrc->nr_irqs); > >> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio); > >> +} > >> + > >> +static const VMStateDescription vmstate_xive_source = { > >> + .name = TYPE_XIVE_SOURCE, > >> + .version_id = 1, > >> + .minimum_version_id = 1, > >> + .fields = (VMStateField[]) { > >> + VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL), > >> + VMSTATE_VBUFFER_UINT32(sbe, XiveSource, 1, NULL, sbe_size), > >> + VMSTATE_END_OF_LIST() > >> + }, > >> +}; > >> + > >> +/* > >> + * The default XIVE interrupt source setting for ESB MMIO is two 64k > >> + * pages without Store EOI. This is in sync with KVM. > >> + */ > >> +static Property xive_source_properties[] = { > >> + DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0), > >> + DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0), > >> + DEFINE_PROP_UINT64("bar", XiveSource, esb_base, 0), > > > > Isn't this redundant with however the base address is handled through > > the SysBusDevice stuff (I forget the details)? > > Storing the ESB MMIO base address under the XiveSource object is > convenient later on in the h_int_get_source_info hcall which make > use of the helpers : > > hwaddr xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno) > hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno) > > But it is only used in that place. So we could just store the ESB > MMIO base address under the sPAPRXive controller. This makes some > sense in the design, as we have to inform KVM of this address with > a KVM device ioctl. Well.. I really dislike the idea that you could change the actual MMIO mapping address in one place, but other bits of code would still think it was mapped somewhere else. > >> + DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, > >> XIVE_ESB_64K_2PAGE), > >> + DEFINE_PROP_END_OF_LIST(), > >> +}; > >> + > >> +static void xive_source_class_init(ObjectClass *klass, void *data) > >> +{ > >> + DeviceClass *dc = DEVICE_CLASS(klass); > >> + > >> + dc->realize = xive_source_realize; > >> + dc->reset = xive_source_reset; > >> + dc->props = xive_source_properties; > >> + dc->desc = "XIVE interrupt source"; > >> + dc->vmsd = &vmstate_xive_source; > >> +} > >> + > >> +static const TypeInfo xive_source_info = { > >> + .name = TYPE_XIVE_SOURCE, > >> + .parent = TYPE_SYS_BUS_DEVICE, > >> + .instance_size = sizeof(XiveSource), > >> + .class_init = xive_source_class_init, > >> +}; > >> + > >> +static void xive_register_types(void) > >> +{ > >> + type_register_static(&xive_source_info); > >> +} > >> + > >> +type_init(xive_register_types) > >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > >> new file mode 100644 > >> index 000000000000..d92a50519edf > >> --- /dev/null > >> +++ b/include/hw/ppc/xive.h > >> @@ -0,0 +1,130 @@ > >> +/* > >> + * QEMU PowerPC XIVE interrupt controller model > >> + * > >> + * Copyright (c) 2017-2018, IBM Corporation. > >> + * > >> + * This code is licensed under the GPL version 2 or later. See the > >> + * COPYING file in the top-level directory. > >> + */ > >> + > >> +#ifndef PPC_XIVE_H > >> +#define PPC_XIVE_H > >> + > >> +#include "hw/sysbus.h" > >> + > >> +/* > >> + * XIVE Interrupt Source > >> + */ > >> + > >> +#define TYPE_XIVE_SOURCE "xive-source" > >> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE) > >> + > >> +/* > >> + * XIVE Source Interrupt source characteristics, which define how the > >> + * ESB are controlled. > >> + */ > >> +#define XIVE_SRC_H_INT_ESB 0x1 /* ESB managed with hcall H_INT_ESB */ > >> +#define XIVE_SRC_STORE_EOI 0x4 /* Store EOI supported */ > >> + > >> +typedef struct XiveSource { > >> + SysBusDevice parent; > >> + > >> + /* IRQs */ > >> + uint32_t nr_irqs; > >> + uint32_t offset; > >> + qemu_irq *qirqs; > >> + > >> + /* PQ bits */ > >> + uint8_t *sbe; > >> + uint32_t sbe_size; > >> + > >> + /* ESB memory region */ > >> + uint64_t esb_flags; > >> + hwaddr esb_base; > >> + uint32_t esb_shift; > >> + MemoryRegion esb_mmio; > >> +} XiveSource; > >> + > >> +/* > >> + * ESB MMIO setting. Can be one page, for both source triggering and > >> + * source management, or two different pages. See below for magic > >> + * values. > >> + */ > >> +#define XIVE_ESB_4K 12 /* PSI HB */ > >> +#define XIVE_ESB_4K_2PAGE 17 > > > > Should this be 13 instead of 17? > > oups. obviously this is not used. > > >> +#define XIVE_ESB_64K 16 > >> +#define XIVE_ESB_64K_2PAGE 17 > > > > (Also, who the hell comes up with a brand new PIC and decides to have > > *4* different interface variants. But that's not your problem, I > > realise). > > HW constraints on the different controllers which need to expose > sources : PSI, PHB4. The internal sources of the XIVE interrupt > controller can be configured to use 4K or 64K but I doubt the 4k > will be ever used. Sure, the hardware is different, but *why* is it different. This is a brand new design, you'd think they could come up with one variant that works for all the cases. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature