On 04/26/2018 05:28 AM, David Gibson wrote: > On Tue, Apr 24, 2018 at 10:11:27AM +0200, Cédric Le Goater wrote: >> On 04/24/2018 08:41 AM, David Gibson wrote: >>> On Mon, Apr 23, 2018 at 09:31:24AM +0200, Cédric Le Goater wrote: >>>> On 04/23/2018 08:44 AM, David Gibson wrote: >>>>> On Thu, Apr 19, 2018 at 02:42:58PM +0200, Cédric Le Goater wrote: >>>>>> The 'sent' status of the LSI interrupt source is modeled with the 'P' >>>>>> bit of the ESB and the assertion status of the source is maintained in >>>>>> an array under the main sPAPRXive object. The type of the source is >>>>>> stored in the same array for practical reasons. >>>>>> >>>>>> Signed-off-by: Cédric Le Goater <c...@kaod.org> >>>>>> --- >>>>>> hw/intc/xive.c | 54 >>>>>> +++++++++++++++++++++++++++++++++++++++++++++++---- >>>>>> include/hw/ppc/xive.h | 16 +++++++++++++++ >>>>>> 2 files changed, 66 insertions(+), 4 deletions(-) >>>>>> >>>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >>>>>> index c70578759d02..060976077dd7 100644 >>>>>> --- a/hw/intc/xive.c >>>>>> +++ b/hw/intc/xive.c >>>>>> @@ -104,6 +104,21 @@ static void xive_source_notify(XiveSource *xsrc, >>>>>> int srcno) >>>>>> >>>>>> } >>>>>> >>>>>> +/* >>>>>> + * LSI interrupt sources use the P bit and a custom assertion flag >>>>>> + */ >>>>>> +static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno) >>>>>> +{ >>>>>> + uint8_t old_pq = xive_source_pq_get(xsrc, srcno); >>>>>> + >>>>>> + if (old_pq == XIVE_ESB_RESET && >>>>>> + xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { >>>>>> + xive_source_pq_set(xsrc, srcno, XIVE_ESB_PENDING); >>>>>> + return true; >>>>>> + } >>>>>> + return false; >>>>>> +} >>>>>> + >>>>>> /* In a two pages ESB MMIO setting, even page is the trigger page, odd >>>>>> * page is for management */ >>>>>> static inline bool xive_source_is_trigger_page(hwaddr addr) >>>>>> @@ -133,6 +148,13 @@ static uint64_t xive_source_esb_read(void *opaque, >>>>>> hwaddr addr, unsigned size) >>>>>> */ >>>>>> ret = xive_source_pq_eoi(xsrc, srcno); >>>>>> >>>>>> + /* If the LSI source is still asserted, forward a new source >>>>>> + * event notification */ >>>>>> + if (xive_source_irq_is_lsi(xsrc, srcno)) { >>>>>> + if (xive_source_lsi_trigger(xsrc, srcno)) { >>>>>> + xive_source_notify(xsrc, srcno); >>>>>> + } >>>>>> + } >>>>>> break; >>>>>> >>>>>> case XIVE_ESB_GET: >>>>>> @@ -183,6 +205,14 @@ static void xive_source_esb_write(void *opaque, >>>>>> hwaddr addr, >>>>>> * notification >>>>>> */ >>>>>> notify = xive_source_pq_eoi(xsrc, srcno); >>>>>> + >>>>>> + /* LSI sources do not set the Q bit but they can still be >>>>>> + * asserted, in which case we should forward a new source >>>>>> + * event notification >>>>>> + */ >>>>>> + if (xive_source_irq_is_lsi(xsrc, srcno)) { >>>>>> + notify = xive_source_lsi_trigger(xsrc, srcno); >>>>>> + } >>>> >>>> FYI, I have moved that common test under xive_source_pq_eoi() >>> >>> Ok. >>> >>>>>> break; >>>>>> >>>>>> default: >>>>>> @@ -216,8 +246,17 @@ static void xive_source_set_irq(void *opaque, int >>>>>> srcno, int val) >>>>>> XiveSource *xsrc = XIVE_SOURCE(opaque); >>>>>> bool notify = false; >>>>>> >>>>>> - if (val) { >>>>>> - notify = xive_source_pq_trigger(xsrc, srcno); >>>>>> + if (xive_source_irq_is_lsi(xsrc, srcno)) { >>>>>> + if (val) { >>>>>> + xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; >>>>>> + } else { >>>>>> + xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; >>>>>> + } >>>>>> + notify = xive_source_lsi_trigger(xsrc, srcno); >>>>>> + } else { >>>>>> + if (val) { >>>>>> + notify = xive_source_pq_trigger(xsrc, srcno); >>>>>> + } >>>>>> } >>>>>> >>>>>> /* Forward the source event notification for routing */ >>>>>> @@ -234,13 +273,13 @@ void xive_source_pic_print_info(XiveSource *xsrc, >>>>>> Monitor *mon) >>>>>> xsrc->offset, xsrc->offset + xsrc->nr_irqs - 1); >>>>>> for (i = 0; i < xsrc->nr_irqs; i++) { >>>>>> uint8_t pq = xive_source_pq_get(xsrc, i); >>>>>> - uint32_t lisn = i + xsrc->offset; >>>>>> >>>>>> if (pq == XIVE_ESB_OFF) { >>>>>> continue; >>>>>> } >>>>>> >>>>>> - monitor_printf(mon, " %4x %c%c\n", lisn, >>>>>> + monitor_printf(mon, " %4x %s %c%c\n", i + xsrc->offset, >>>>>> + xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI", >>>>>> pq & XIVE_ESB_VAL_P ? 'P' : '-', >>>>>> pq & XIVE_ESB_VAL_Q ? 'Q' : '-'); >>>>>> } >>>>>> @@ -249,6 +288,12 @@ void xive_source_pic_print_info(XiveSource *xsrc, >>>>>> Monitor *mon) >>>>>> static void xive_source_reset(DeviceState *dev) >>>>>> { >>>>>> XiveSource *xsrc = XIVE_SOURCE(dev); >>>>>> + int i; >>>>>> + >>>>>> + /* Keep the IRQ type */ >>>>>> + for (i = 0; i < xsrc->nr_irqs; i++) { >>>>>> + xsrc->status[i] &= ~XIVE_STATUS_ASSERTED; >>>>>> + } >>>>>> >>>>>> /* SBEs are initialized to 0b01 which corresponds to "ints off" */ >>>>>> memset(xsrc->sbe, 0x55, xsrc->sbe_size); >>>>>> @@ -273,6 +318,7 @@ static void xive_source_realize(DeviceState *dev, >>>>>> Error **errp) >>>>>> >>>>>> xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, >>>>>> xsrc->nr_irqs); >>>>>> + xsrc->status = g_malloc0(xsrc->nr_irqs); >>>>>> >>>>>> /* Allocate the SBEs (State Bit Entry). 2 bits, so 4 entries per >>>>>> byte */ >>>>>> xsrc->sbe_size = DIV_ROUND_UP(xsrc->nr_irqs, 4); >>>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h >>>>>> index d92a50519edf..0b76dd278d9b 100644 >>>>>> --- a/include/hw/ppc/xive.h >>>>>> +++ b/include/hw/ppc/xive.h >>>>>> @@ -33,6 +33,9 @@ typedef struct XiveSource { >>>>>> uint32_t nr_irqs; >>>>>> uint32_t offset; >>>>>> qemu_irq *qirqs; >>>>>> +#define XIVE_STATUS_LSI 0x1 >>>>>> +#define XIVE_STATUS_ASSERTED 0x2 >>>>>> + uint8_t *status; >>>>> >>>>> I don't love the idea of mixing configuration information (STATUS_LSI) >>>>> with runtime state information (ASSERTED) in the same field. Any >>>>> reason not to have these as parallel bitmaps. >>>> >>>> none. I can change that. >>> >>> Ok. >>> >>>>> Come to that.. is there a compelling reason to allow any individual >>>>> irq to be marked LSI or MSI, rather than using separate XiveSource >>>>> objects for MSIs and LSIs? >>>> >>>> yes. I would have preferred two distinct interrupt source objects but >>>> this is to be compatible with XICS, which uses only one. If we want >>>> to be able to change interrupt mode, the IRQ number space should be >>>> organized in the exact same way. Or we should change XICS also. >>>> >>>> Also, the change (a bitmap) is really small. >>> >>> Hrm, but since XIVE supports thousands of irqs, it could be quite a >>> large bitmap. >> >> Yes. The change is small, not the bitmap. >> >>> It's not impossible - in fact, not really even that hard - to change >>> the existing irq layout on xics. It does need a new machine type >>> variant, of course. >> >> I did some work on that topic a while ago : >> >> https://patchwork.ozlabs.org/cover/836782/ >> >> But we stopped exploring the idea. May be it was not the good approach. >> The PHBs LSIs would benefit from such a split though. > > So, no, I don't think that was a good approach, but that doesn't mean > other ways of rearranging the irq numbers aren't ok. The thing here > is that we don't want to think of an "irq allocator" - there are some > bits like that in there already, but they were always a mistake. > > We have lots of irq space (both XICS and XIVE) so instead we should > come up with a static mapping of irqs to devices.
yes. I would prefer that also. We could change the spapr_irq_alloc() routine to get a block of IRQs in the range defined for a device family, and use a device id to offset in that family range ? Here are some figures : device family block size max devices EVENT_CLASS_EPOW 1 1 EVENT_CLASS_HOT_PLUG 1 1 VIO_VSCSI 1 10 VIO_LLAN 1 10 VIO_VTY 1 5 PCI/PHB 1024 5 C. >>>>>> /* PQ bits */ >>>>>> uint8_t *sbe; >>>>> >>>>> .. and come to that is there a reason to keep the ASSERTED bit in a >>>>> separate array from sbe? AFAICT the actual 2-bit-per-irq layout is >>>>> never exposed to the guests. >>>> >>>> indeed. we always use the xive_source_pq_get/set() helpers to >>>> manipulate the PQ bits. So we could add an extra bit for the ASSERT >>>> without too much changes. Could also we put the type there or would >>>> you still prefer a bitmap ? >>> >>> I'd prefer the type (config information) be separate from the P, Q, >>> ASSERTED bits (state information). >> >> ok. So I will use the 'uint8_t *status' for P, Q, ASSERT, which leaves >> 5 bits available, but I don't think it is really worth the pain to >> optimize the size. > > Sure. I don't really care if it's packed or not. > >> The sbe array will disappear and we will have >> a bitmap for the type. > > We may or may not keep the type bitmap based on the discussion above, > but in any case this is a good step forward. > >> >> Thanks, >> >> C. >> >>>>> Or, even re-use the Q bit for asserted in LSIs (but report it as >>>>> always 0 in the register read/write path). >>>> >>>> I would prefer to add extra status bits. It is easier to debug. >>>> >>>> Thanks, >>>> >>>> C. >>>> >>>>>> @@ -127,4 +130,17 @@ uint8_t xive_source_pq_set(XiveSource *xsrc, >>>>>> uint32_t srcno, uint8_t pq); >>>>>> >>>>>> void xive_source_pic_print_info(XiveSource *xsrc, Monitor *mon); >>>>>> >>>>>> +static inline bool xive_source_irq_is_lsi(XiveSource *xsrc, uint32_t >>>>>> srcno) >>>>>> +{ >>>>>> + assert(srcno < xsrc->nr_irqs); >>>>>> + return xsrc->status[srcno] & XIVE_STATUS_LSI; >>>>>> +} >>>>>> + >>>>>> +static inline void xive_source_irq_set(XiveSource *xsrc, uint32_t srcno, >>>>>> + bool lsi) >>>>>> +{ >>>>>> + assert(srcno < xsrc->nr_irqs); >>>>>> + xsrc->status[srcno] |= lsi ? XIVE_STATUS_LSI : 0; >>>>>> +} >>>>>> + >>>>>> #endif /* PPC_XIVE_H */ >>>>> >>>> >>> >> >