On 5/22/25 4:46 PM, Jan Beulich wrote:
On 21.05.2025 18:03, Oleksii Kurochko wrote:
--- /dev/null
+++ b/xen/arch/riscv/imsic.c
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * xen/arch/riscv/imsic.c
+ *
+ * RISC-V Incoming MSI Controller support
+ *
+ * (c) Microchip Technology Inc.
+ * (c) Vates
+ */
+
+#include <xen/bitops.h>
+#include <xen/const.h>
+#include <xen/cpumask.h>
+#include <xen/device_tree.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/macros.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+#include <xen/xmalloc.h>
+
+#include <asm/imsic.h>
+
+static struct imsic_config imsic_cfg;
+
+ irq_range = xzalloc_array(uint32_t, *nr_parent_irqs * 2);
+ if ( !irq_range )
+ panic("%s: irq_range[] allocation failed\n", __func__);
+
+ if ( (rc = dt_property_read_u32_array(node, "interrupts-extended",
+ irq_range, *nr_parent_irqs * 2)) )
+ panic("%s: unable to find interrupt-extended in %s node: %d\n",
+ __func__, dt_node_full_name(node), rc);
+
+ if ( irq_range[1] == IRQ_M_EXT )
+ {
+ /* Machine mode imsic node, ignore it. */
+ rc = IRQ_M_EXT;
+ goto cleanup;
+ }
Wouldn't this better be done ...
+ /* Check that interrupts-extended property is well-formed. */
+ for ( unsigned int i = 2; i < (*nr_parent_irqs * 2); i += 2 )
+ {
+ if ( irq_range[i + 1] != irq_range[1] )
+ panic("%s: mode[%d] != %d\n", __func__, i + 1, irq_range[1]);
+ }
... after this consistency check?
My intention was: if the first|irq_range| isn't|IRQ_M_EXT|, then there's no
point in parsing the full|interrupts-extended| property.
However, on the other hand, it might be important to ensure that the
|interrupts-extended| property is properly formed.
So yes, it makes sense to move the check above, before the|for| loop.
Also %u please when you log unsigned values.
+ if ( !dt_property_read_u32(node, "riscv,guest-index-bits",
+ &imsic_cfg.guest_index_bits) )
+ imsic_cfg.guest_index_bits = 0;
+ tmp = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT;
+ if ( tmp < imsic_cfg.guest_index_bits )
+ {
+ printk(XENLOG_ERR "%s: guest index bits too big\n",
+ dt_node_name(node));
+ rc = -ENOENT;
+ goto cleanup;
+ }
+
+ /* Find number of HART index bits */
+ if ( !dt_property_read_u32(node, "riscv,hart-index-bits",
+ &imsic_cfg.hart_index_bits) )
+ {
+ /* Assume default value */
+ imsic_cfg.hart_index_bits = fls(*nr_parent_irqs);
+ if ( BIT(imsic_cfg.hart_index_bits, UL) < *nr_parent_irqs )
+ imsic_cfg.hart_index_bits++;
Since fls() returns a 1-based bit number, isn't it rather that in the
exact-power-of-2 case you'd need to subtract 1?
Agree, in this case, -1 should be taken into account.
+ }
+ tmp -= imsic_cfg.guest_index_bits;
+ if ( tmp < imsic_cfg.hart_index_bits )
+ {
+ printk(XENLOG_ERR "%s: HART index bits too big\n",
+ dt_node_name(node));
+ rc = -ENOENT;
+ goto cleanup;
+ }
+
+ /* Find number of group index bits */
+ if ( !dt_property_read_u32(node, "riscv,group-index-bits",
+ &imsic_cfg.group_index_bits) )
+ imsic_cfg.group_index_bits = 0;
+ tmp -= imsic_cfg.hart_index_bits;
+ if ( tmp < imsic_cfg.group_index_bits )
+ {
+ printk(XENLOG_ERR "%s: group index bits too big\n",
+ dt_node_name(node));
+ rc = -ENOENT;
+ goto cleanup;
+ }
+
+ /* Find first bit position of group index */
+ tmp = IMSIC_MMIO_PAGE_SHIFT * 2;
+ if ( !dt_property_read_u32(node, "riscv,group-index-shift",
+ &imsic_cfg.group_index_shift) )
+ imsic_cfg.group_index_shift = tmp;
+ if ( imsic_cfg.group_index_shift < tmp )
+ {
+ printk(XENLOG_ERR "%s: group index shift too small\n",
+ dt_node_name(node));
+ rc = -ENOENT;
+ goto cleanup;
+ }
+ tmp = imsic_cfg.group_index_bits + imsic_cfg.group_index_shift - 1;
+ if ( tmp >= BITS_PER_LONG )
+ {
+ printk(XENLOG_ERR "%s: group index shift too big\n",
+ dt_node_name(node));
+ rc = -EINVAL;
+ goto cleanup;
+ }
+
+ /* Find number of interrupt identities */
+ if ( !dt_property_read_u32(node, "riscv,num-ids", &imsic_cfg.nr_ids) )
+ {
+ printk(XENLOG_ERR "%s: number of interrupt identities not found\n",
+ node->name);
+ rc = -ENOENT;
+ goto cleanup;
+ }
+
+ if ( (imsic_cfg.nr_ids < IMSIC_MIN_ID) ||
+ (imsic_cfg.nr_ids > IMSIC_MAX_ID) ||
+ ((imsic_cfg.nr_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID) )
Now that you've explained to me what the deal is with these constants: Isn't
the 1st of the three checks redundant with the last one?
Agree, one of them could be dropped.
+/*
+ * Initialize the imsic_cfg structure based on the IMSIC DT node.
+ *
+ * Returns 0 if initialization is successful, a negative value on failure,
+ * or IRQ_M_EXT if the IMSIC node corresponds to a machine-mode IMSIC,
+ * which should be ignored by the hypervisor.
+ */
+int __init imsic_init(const struct dt_device_node *node)
+{
+ int rc;
+ unsigned long reloff, hartid;
+ unsigned int nr_parent_irqs, index, nr_handlers = 0;
+ paddr_t base_addr;
+ unsigned int nr_mmios;
+
+ /* Parse IMSIC node */
+ rc = imsic_parse_node(node, &nr_parent_irqs);
+ /*
+ * If machine mode imsic node => ignore it.
+ * If rc < 0 => parsing of IMSIC DT node failed.
+ */
+ if ( (rc == IRQ_M_EXT) || rc )
+ return rc;
The former of the checks is redundant with the latter. Did you perhaps mean
"rc < 0" for that one?
Yes, like the comment is correct but in code I did a mistake.
+ nr_mmios = imsic_cfg.nr_mmios;
+
+ /* Allocate MMIO resource array */
+ imsic_cfg.mmios = xzalloc_array(struct imsic_mmios, nr_mmios);
How large can this and ...
+ if ( !imsic_cfg.mmios )
+ {
+ rc = -ENOMEM;
+ goto imsic_init_err;
+ }
+
+ imsic_cfg.msi = xzalloc_array(struct imsic_msi, nr_parent_irqs);
... this array grow (in principle)?
Roughly speaking, this is the number of processors. The highests amount of
processors
on the market I saw it was 32. But it was over a year ago when I last checked
this.
I think you're aware that in principle
new code is expected to use xvmalloc() and friends unless there are specific
reasons speaking against that.
Oh, missed 'v'...
+ if ( !imsic_cfg.msi )
+ {
+ rc = -ENOMEM;
+ goto imsic_init_err;
+ }
+
+ /* Check MMIO register sets */
+ for ( unsigned int i = 0; i < nr_mmios; i++ )
+ {
+ if ( !alloc_cpumask_var(&imsic_cfg.mmios[i].cpus) )
+ {
+ rc = -ENOMEM;
+ goto imsic_init_err;
+ }
+
+ rc = dt_device_get_address(node, i, &imsic_cfg.mmios[i].base_addr,
+ &imsic_cfg.mmios[i].size);
+ if ( rc )
+ {
+ printk(XENLOG_ERR "%s: unable to parse MMIO regset %u\n",
+ node->name, i);
+ goto imsic_init_err;
+ }
+
+ base_addr = imsic_cfg.mmios[i].base_addr;
+ base_addr &= ~(BIT(imsic_cfg.guest_index_bits +
+ imsic_cfg.hart_index_bits +
+ IMSIC_MMIO_PAGE_SHIFT, UL) - 1);
+ base_addr &= ~((BIT(imsic_cfg.group_index_bits, UL) - 1) <<
+ imsic_cfg.group_index_shift);
+ if ( base_addr != imsic_cfg.base_addr )
+ {
+ rc = -EINVAL;
+ printk(XENLOG_ERR "%s: address mismatch for regset %u\n",
+ node->name, i);
+ goto imsic_init_err;
+ }
Maybe just for my own understanding: There's no similar check for the
sizes to match / be consistent wanted / needed?
If you are speaking about imsic_cfg.mmios[i].size then it depends fully on h/w
will
provide, IMO.
So I don't what is possible range for imsic_cfg.mmios[i].size.
+ }
+
+ /* Configure handlers for target CPUs */
+ for ( unsigned int i = 0; i < nr_parent_irqs; i++ )
+ {
+ unsigned long xen_cpuid;
+
+ rc = imsic_get_parent_hartid(node, i, &hartid);
+ if ( rc )
+ {
+ printk(XENLOG_WARNING "%s: cpu ID for parent irq%u not found\n",
+ node->name, i);
+ continue;
+ }
+
+ xen_cpuid = hartid_to_cpuid(hartid);
I'm probably biased by "cpuid" having different meaning on x86, but: To
be consistent with variable names elsewhere, couldn't this variable simply
be named "cpu"? With the other item named "hartid" there's no ambiguity
there anymore.
Sure, I will use "cpu"/"xen_cpu" for instead of xen_cpuid.
+ if ( xen_cpuid >= num_possible_cpus() )
+ {
+ printk(XENLOG_WARNING "%s: unsupported cpu ID=%lu for parent
irq%u\n",
+ node->name, hartid, i);
The message continues to be ambiguous (to me as a non-RISC-V person at
least): You log a hart ID, while you say "cpu ID". Also, as I think I
said elsewhere already, the hart ID may better be logged using %#lx.
I will correct the message.
+ }
+
+ if ( index == nr_mmios )
+ {
+ printk(XENLOG_WARNING "%s: MMIO not found for parent irq%u\n",
+ node->name, i);
+ continue;
+ }
+
+ if ( !IS_ALIGNED(imsic_cfg.msi[xen_cpuid].base_addr + reloff,
PAGE_SIZE) )
If this is the crucial thing to check, ...
+ {
+ printk(XENLOG_WARNING "%s: MMIO address %#lx is not aligned on a
page\n",
+ node->name, imsic_cfg.msi[xen_cpuid].base_addr + reloff);
+ imsic_cfg.msi[xen_cpuid].offset = 0;
+ imsic_cfg.msi[xen_cpuid].base_addr = 0;
+ continue;
+ }
+
+ cpumask_set_cpu(xen_cpuid, imsic_cfg.mmios[index].cpus);
+
+ imsic_cfg.msi[xen_cpuid].base_addr = imsic_cfg.mmios[index].base_addr;
+ imsic_cfg.msi[xen_cpuid].offset = reloff;
... why is it that the two parts are stored separately? If their sum needs to
be page-aligned, I'd kind of expect it's only ever the sum which is used?
Because in APLIC's target register it is used only base_addr and which one
interrupt
file to use is chosen by hart/guest index:
static void vaplic_update_target(const struct imsic_config *imsic,
int guest_id, unsigned long hart_id,
uint32_t *value)
{
unsigned long group_index;
uint32_t hhxw = imsic->group_index_bits;
uint32_t lhxw = imsic->hart_index_bits;
uint32_t hhxs = imsic->group_index_shift - IMSIC_MMIO_PAGE_SHIFT * 2;
unsigned long base_ppn = imsic->msi[hart_id].base_addr >>
IMSIC_MMIO_PAGE_SHIFT;
group_index = (base_ppn >> (hhxs + 12)) & (BIT(hhxw, UL) - 1);
*value &= APLIC_TARGET_EIID_MASK;
*value |= guest_id << APLIC_TARGET_GUEST_IDX_SHIFT;
*value |= hart_id << APLIC_TARGET_HART_IDX_SHIFT;
*value |= group_index << (lhxw + APLIC_TARGET_HART_IDX_SHIFT) ;
}
Also is it really PAGE_SHIFT or rather IMSIC_MMIO_PAGE_SHIFT that needs
chacking against?
I think more correct will be IMSIC_MMIO_PAGE_SHIFT.
Further please pay attention to line length limits - there are at least two
violations around my earlier comment here.
--- /dev/null
+++ b/xen/arch/riscv/include/asm/imsic.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * xen/arch/riscv/include/asm/imsic.h
+ *
+ * RISC-V Incoming MSI Controller support
+ *
+ * (c) Microchip Technology Inc.
+ */
+
+#ifndef ASM__RISCV__IMSIC_H
+#define ASM__RISCV__IMSIC_H
Please update according to the most recent naming rules change (all it takes
may be to shrink the double underscores).
+#include <xen/types.h>
+
+#define IMSIC_MMIO_PAGE_SHIFT 12
+#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT)
+
+#define IMSIC_MIN_ID 63
+#define IMSIC_MAX_ID 2047
+
+struct imsic_msi {
+ paddr_t base_addr;
+ unsigned long offset;
+};
+
+struct imsic_mmios {
+ paddr_t base_addr;
+ unsigned long size;
+ cpumask_var_t cpus;
+};
+
+struct imsic_config {
+ /* Base address */
+ paddr_t base_addr;
+
+ /* Bits representing Guest index, HART index, and Group index */
+ unsigned int guest_index_bits;
+ unsigned int hart_index_bits;
+ unsigned int group_index_bits;
+ unsigned int group_index_shift;
+
+ /* IMSIC phandle */
+ unsigned int phandle;
+
+ /* Number of parent irq */
+ unsigned int nr_parent_irqs;
+
+ /* Number off interrupt identities */
+ unsigned int nr_ids;
+
+ /* MMIOs */
+ unsigned int nr_mmios;
+ struct imsic_mmios *mmios;
Are the contents of this and ...
+ /* MSI */
+ struct imsic_msi *msi;
... this array ever changing post-init? If not, the pointers here may want
to be pointer-to-const (requiring local variables in the function populating
the field).
No, they will be initialized once.
Even more I think we can drop *mmios and nr_mmios from this struct as it is
used only
in imsic_init(), so could be allocated and freed there.
Only *msi is used in the function (vaplic_update_target) mentioned above.
@@ -18,6 +19,18 @@ static inline unsigned long cpuid_to_hartid(unsigned long
cpuid)
return pcpu_info[cpuid].hart_id;
}
+static inline unsigned long hartid_to_cpuid(unsigned long hartid)
+{
+ for ( unsigned int cpuid = 0; cpuid < ARRAY_SIZE(pcpu_info); cpuid++ )
+ {
+ if ( hartid == cpuid_to_hartid(cpuid) )
+ return cpuid;
+ }
+
+ /* hartid isn't valid for some reason */
+ return NR_CPUS;
+}
Considering the values being returned, why's the function's return type
"unsigned long"?
mhartid register has MXLEN length, so theoretically we could have from 0 to
MXLEN-1
Harts and so we could have the same amount of Xen's CPUIDs. and MXLEN is 32 for
RV32
and MXLEN is 64 for RV64.
Why the use of ARRAY_SIZE() in the loop header? You don't use pcpu_info[]
in the loop body.
I will chang to NR_CPUs. I thought that it would be more generic if pcpu_info
will
be initialized with something else, not NR_CPUs, but I don't rembember why I
think
it would be better.
Finally - on big systems this is going to be pretty inefficient a lookup.
This may want to at least have a TODO comment attached.
Sure, I will add.
Thanks.
~ Oleksii