Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-21 Thread Daniel Henrique Barboza




On 5/21/24 07:52, Frank Chang wrote:

Hi Daniel,

On Tue, May 21, 2024 at 12:17 AM Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com>> wrote:

Hi Frank,

On 5/16/24 04:13, Frank Chang wrote:
 > On Mon, May 13, 2024 at 8:37 PM Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com> >> wrote:
 >
 >     Hi Frank,
 >
 >
 >     On 5/8/24 08:15, Daniel Henrique Barboza wrote:
 >      > Hi Frank,
 >      >
 >      > I'll reply with that I've done so far. Still missing some stuff:
 >      >
 >      > On 5/2/24 08:37, Frank Chang wrote:
 >      >> Hi Daniel,
 >      >>
 >      >> Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com> >> 於 2024年3月8日 週五 上午12:04寫道:
 >      >>>
 >      >>> From: Tomasz Jeznach mailto:tjezn...@rivosinc.com> 
>>
 >      >>>
 >      >>> The RISC-V IOMMU specification is now ratified as-per the RISC-V
 >      >>> international process. The latest frozen specifcation can be 
found
 >      >>> at:
 >      >>>
 >      >>> 
https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf 
 
>
 >      >>>
 >      >>> Add the foundation of the device emulation for RISC-V IOMMU, 
which
 >      >>> includes an IOMMU that has no capabilities but MSI interrupt 
support and
 >      >>> fault queue interfaces. We'll add add more features 
incrementally in the
 >      >>> next patches.
 >      >>>
 >      >>> Co-developed-by: Sebastien Boeuf mailto:s...@rivosinc.com> 
>>
 >      >>> Signed-off-by: Sebastien Boeuf mailto:s...@rivosinc.com> 
>>
 >      >>> Signed-off-by: Tomasz Jeznach mailto:tjezn...@rivosinc.com> >>
 >      >>> Signed-off-by: Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com> >>
 >      >>> ---
 >      >>>   hw/riscv/Kconfig |    4 +
 >
 >     (...)
 >
 >      >>> +
 >      >>> +    s->iommus.le_next = NULL;
 >      >>> +    s->iommus.le_prev = NULL;
 >      >>> +    QLIST_INIT(>spaces);
 >      >>> +    qemu_cond_init(>core_cond);
 >      >>> +    qemu_mutex_init(>core_lock);
 >      >>> +    qemu_spin_init(>regs_lock);
 >      >>> +    qemu_thread_create(>core_proc, "riscv-iommu-core",
 >      >>> +    riscv_iommu_core_proc, s, QEMU_THREAD_JOINABLE);
 >      >>
 >      >> In our experience, using QEMU thread increases the latency of 
command
 >      >> queue processing,
 >      >> which leads to the potential IOMMU fence timeout in the Linux 
driver
 >      >> when using IOMMU with KVM,
 >      >> e.g. booting the guest Linux.
 >      >>
 >      >> Is it possible to remove the thread from the IOMMU just like 
ARM, AMD,
 >      >> and Intel IOMMU models?
 >      >
 >      > Interesting. We've been using this emulation internally in 
Ventana, with
 >      > KVM and VFIO, and didn't experience this issue. Drew is on CC and 
can talk
 >      > more about it.
 >      >
 >      > That said, I don't mind this change, assuming it's feasible to 
make it for this
 >      > first version.  I'll need to check it how other IOMMUs are doing 
it.
 >
 >
 >     I removed the threading and it seems to be working fine without it. 
I'll commit this
 >     change for v3.
 >
 >      >
 >      >
 >      >
 >      >>
 >      >>> +}
 >      >>> +
 >      >
 >      > (...)
 >      >
 >      >>> +
 >      >>> +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void 
*opaque, int devfn)
 >      >>> +{
 >      >>> +    RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
 >      >>> +    PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), 
devfn);
 >      >>> +    AddressSpace *as = NULL;
 >      >>> +
 >      >>> +    if (pdev && pci_is_iommu(pdev)) {
 >      >>> +    return s->target_as;
 >      >>> +    }
 >      >>> +
 >      >>> +    /* Find first registered IOMMU device */
 >      >>> +    while (s->iommus.le_prev) {
 >      >>> +    s = *(s->iommus.le_prev);
 >      >>> +    }
 >      >>> +
 >      >>> +    /* Find first matching IOMMU */
 >      >>> +    while (s != NULL && as == NULL) {
 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-21 Thread Frank Chang
Hi Daniel,

On Tue, May 21, 2024 at 12:17 AM Daniel Henrique Barboza <
dbarb...@ventanamicro.com> wrote:

> Hi Frank,
>
> On 5/16/24 04:13, Frank Chang wrote:
> > On Mon, May 13, 2024 at 8:37 PM Daniel Henrique Barboza <
> dbarb...@ventanamicro.com > wrote:
> >
> > Hi Frank,
> >
> >
> > On 5/8/24 08:15, Daniel Henrique Barboza wrote:
> >  > Hi Frank,
> >  >
> >  > I'll reply with that I've done so far. Still missing some stuff:
> >  >
> >  > On 5/2/24 08:37, Frank Chang wrote:
> >  >> Hi Daniel,
> >  >>
> >  >> Daniel Henrique Barboza  dbarb...@ventanamicro.com>> 於 2024年3月8日 週五 上午12:04寫道:
> >  >>>
> >  >>> From: Tomasz Jeznach  tjezn...@rivosinc.com>>
> >  >>>
> >  >>> The RISC-V IOMMU specification is now ratified as-per the RISC-V
> >  >>> international process. The latest frozen specifcation can be
> found
> >  >>> at:
> >  >>>
> >  >>>
> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf
> <
> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf
> >
> >  >>>
> >  >>> Add the foundation of the device emulation for RISC-V IOMMU,
> which
> >  >>> includes an IOMMU that has no capabilities but MSI interrupt
> support and
> >  >>> fault queue interfaces. We'll add add more features
> incrementally in the
> >  >>> next patches.
> >  >>>
> >  >>> Co-developed-by: Sebastien Boeuf  s...@rivosinc.com>>
> >  >>> Signed-off-by: Sebastien Boeuf  s...@rivosinc.com>>
> >  >>> Signed-off-by: Tomasz Jeznach  tjezn...@rivosinc.com>>
> >  >>> Signed-off-by: Daniel Henrique Barboza <
> dbarb...@ventanamicro.com >
> >  >>> ---
> >  >>>   hw/riscv/Kconfig |4 +
> >
> > (...)
> >
> >  >>> +
> >  >>> +s->iommus.le_next = NULL;
> >  >>> +s->iommus.le_prev = NULL;
> >  >>> +QLIST_INIT(>spaces);
> >  >>> +qemu_cond_init(>core_cond);
> >  >>> +qemu_mutex_init(>core_lock);
> >  >>> +qemu_spin_init(>regs_lock);
> >  >>> +qemu_thread_create(>core_proc, "riscv-iommu-core",
> >  >>> +riscv_iommu_core_proc, s, QEMU_THREAD_JOINABLE);
> >  >>
> >  >> In our experience, using QEMU thread increases the latency of
> command
> >  >> queue processing,
> >  >> which leads to the potential IOMMU fence timeout in the Linux
> driver
> >  >> when using IOMMU with KVM,
> >  >> e.g. booting the guest Linux.
> >  >>
> >  >> Is it possible to remove the thread from the IOMMU just like
> ARM, AMD,
> >  >> and Intel IOMMU models?
> >  >
> >  > Interesting. We've been using this emulation internally in
> Ventana, with
> >  > KVM and VFIO, and didn't experience this issue. Drew is on CC and
> can talk
> >  > more about it.
> >  >
> >  > That said, I don't mind this change, assuming it's feasible to
> make it for this
> >  > first version.  I'll need to check it how other IOMMUs are doing
> it.
> >
> >
> > I removed the threading and it seems to be working fine without it.
> I'll commit this
> > change for v3.
> >
> >  >
> >  >
> >  >
> >  >>
> >  >>> +}
> >  >>> +
> >  >
> >  > (...)
> >  >
> >  >>> +
> >  >>> +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void
> *opaque, int devfn)
> >  >>> +{
> >  >>> +RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
> >  >>> +PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus),
> devfn);
> >  >>> +AddressSpace *as = NULL;
> >  >>> +
> >  >>> +if (pdev && pci_is_iommu(pdev)) {
> >  >>> +return s->target_as;
> >  >>> +}
> >  >>> +
> >  >>> +/* Find first registered IOMMU device */
> >  >>> +while (s->iommus.le_prev) {
> >  >>> +s = *(s->iommus.le_prev);
> >  >>> +}
> >  >>> +
> >  >>> +/* Find first matching IOMMU */
> >  >>> +while (s != NULL && as == NULL) {
> >  >>> +as = riscv_iommu_space(s,
> PCI_BUILD_BDF(pci_bus_num(bus), devfn));
> >  >>
> >  >> For pci_bus_num(),
> >  >> riscv_iommu_find_as() can be called at the very early stage
> >  >> where software has no chance to enumerate the bus numbers.
> >
> > I investigated and this doesn't seem to be a problem. This function
> is called at the
> > last step of the realize() steps of both riscv_iommu_pci_realize()
> and
> > riscv_iommu_sys_realize(), and by that time the pci_bus_num() is
> already assigned.
> > Other iommus use pci_bus_num() into their own get_address_space()
> callbacks like
> > this too.
> >
> >
> > Hi Daniel,
> >
> > IIUC, pci_bus_num() by default is assigned to pcibus_num():
> >
> > static int pcibus_num(PCIBus *bus)
> > {
> >  if (pci_bus_is_root(bus)) {
> >  return 0; /* pci host bridge */
> >  }
> >  

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-20 Thread Daniel Henrique Barboza

Hi Frank,

On 5/16/24 04:13, Frank Chang wrote:

On Mon, May 13, 2024 at 8:37 PM Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com>> wrote:

Hi Frank,


On 5/8/24 08:15, Daniel Henrique Barboza wrote:
 > Hi Frank,
 >
 > I'll reply with that I've done so far. Still missing some stuff:
 >
 > On 5/2/24 08:37, Frank Chang wrote:
 >> Hi Daniel,
 >>
 >> Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com>> 於 2024年3月8日 週五 上午12:04寫道:
 >>>
 >>> From: Tomasz Jeznach mailto:tjezn...@rivosinc.com>>
 >>>
 >>> The RISC-V IOMMU specification is now ratified as-per the RISC-V
 >>> international process. The latest frozen specifcation can be found
 >>> at:
 >>>
 >>> 
https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf 

 >>>
 >>> Add the foundation of the device emulation for RISC-V IOMMU, which
 >>> includes an IOMMU that has no capabilities but MSI interrupt support 
and
 >>> fault queue interfaces. We'll add add more features incrementally in 
the
 >>> next patches.
 >>>
 >>> Co-developed-by: Sebastien Boeuf mailto:s...@rivosinc.com>>
 >>> Signed-off-by: Sebastien Boeuf mailto:s...@rivosinc.com>>
 >>> Signed-off-by: Tomasz Jeznach mailto:tjezn...@rivosinc.com>>
 >>> Signed-off-by: Daniel Henrique Barboza mailto:dbarb...@ventanamicro.com>>
 >>> ---
 >>>   hw/riscv/Kconfig |    4 +

(...)

 >>> +
 >>> +    s->iommus.le_next = NULL;
 >>> +    s->iommus.le_prev = NULL;
 >>> +    QLIST_INIT(>spaces);
 >>> +    qemu_cond_init(>core_cond);
 >>> +    qemu_mutex_init(>core_lock);
 >>> +    qemu_spin_init(>regs_lock);
 >>> +    qemu_thread_create(>core_proc, "riscv-iommu-core",
 >>> +    riscv_iommu_core_proc, s, QEMU_THREAD_JOINABLE);
 >>
 >> In our experience, using QEMU thread increases the latency of command
 >> queue processing,
 >> which leads to the potential IOMMU fence timeout in the Linux driver
 >> when using IOMMU with KVM,
 >> e.g. booting the guest Linux.
 >>
 >> Is it possible to remove the thread from the IOMMU just like ARM, AMD,
 >> and Intel IOMMU models?
 >
 > Interesting. We've been using this emulation internally in Ventana, with
 > KVM and VFIO, and didn't experience this issue. Drew is on CC and can 
talk
 > more about it.
 >
 > That said, I don't mind this change, assuming it's feasible to make it 
for this
 > first version.  I'll need to check it how other IOMMUs are doing it.


I removed the threading and it seems to be working fine without it. I'll 
commit this
change for v3.

 >
 >
 >
 >>
 >>> +}
 >>> +
 >
 > (...)
 >
 >>> +
 >>> +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, 
int devfn)
 >>> +{
 >>> +    RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
 >>> +    PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
 >>> +    AddressSpace *as = NULL;
 >>> +
 >>> +    if (pdev && pci_is_iommu(pdev)) {
 >>> +    return s->target_as;
 >>> +    }
 >>> +
 >>> +    /* Find first registered IOMMU device */
 >>> +    while (s->iommus.le_prev) {
 >>> +    s = *(s->iommus.le_prev);
 >>> +    }
 >>> +
 >>> +    /* Find first matching IOMMU */
 >>> +    while (s != NULL && as == NULL) {
 >>> +    as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), 
devfn));
 >>
 >> For pci_bus_num(),
 >> riscv_iommu_find_as() can be called at the very early stage
 >> where software has no chance to enumerate the bus numbers.

I investigated and this doesn't seem to be a problem. This function is 
called at the
last step of the realize() steps of both riscv_iommu_pci_realize() and
riscv_iommu_sys_realize(), and by that time the pci_bus_num() is already 
assigned.
Other iommus use pci_bus_num() into their own get_address_space() callbacks 
like
this too.


Hi Daniel,

IIUC, pci_bus_num() by default is assigned to pcibus_num():

static int pcibus_num(PCIBus *bus)
{
     if (pci_bus_is_root(bus)) {
         return 0; /* pci host bridge */
     }
     return bus->parent_dev->config[PCI_SECONDARY_BUS];
}

If the bus is not the root bus, it tries to read the bus' parent device's
secondary bus number (PCI_SECONDARY_BUS) field in the PCI configuration space.
This field should be programmable by the SW during PCIe enumeration.
But I don't think SW has a chance to be executed before 
riscv_iommu_sys_realize() is called,
since it's pretty early before CPU's execution unless RISC-V IOMMU is 
hot-plugged.
Even if RISC-V IOMMU is hot-plugged, I think riscv_iommu_sys_realize() is still 
called
before SW aware of the existence of IOMMU on the PCI topology tree.

Do you think this 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-16 Thread Frank Chang
On Mon, May 13, 2024 at 8:37 PM Daniel Henrique Barboza <
dbarb...@ventanamicro.com> wrote:

> Hi Frank,
>
>
> On 5/8/24 08:15, Daniel Henrique Barboza wrote:
> > Hi Frank,
> >
> > I'll reply with that I've done so far. Still missing some stuff:
> >
> > On 5/2/24 08:37, Frank Chang wrote:
> >> Hi Daniel,
> >>
> >> Daniel Henrique Barboza  於 2024年3月8日 週五
> 上午12:04寫道:
> >>>
> >>> From: Tomasz Jeznach 
> >>>
> >>> The RISC-V IOMMU specification is now ratified as-per the RISC-V
> >>> international process. The latest frozen specifcation can be found
> >>> at:
> >>>
> >>>
> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf
> >>>
> >>> Add the foundation of the device emulation for RISC-V IOMMU, which
> >>> includes an IOMMU that has no capabilities but MSI interrupt support
> and
> >>> fault queue interfaces. We'll add add more features incrementally in
> the
> >>> next patches.
> >>>
> >>> Co-developed-by: Sebastien Boeuf 
> >>> Signed-off-by: Sebastien Boeuf 
> >>> Signed-off-by: Tomasz Jeznach 
> >>> Signed-off-by: Daniel Henrique Barboza 
> >>> ---
> >>>   hw/riscv/Kconfig |4 +
>
> (...)
>
> >>> +
> >>> +s->iommus.le_next = NULL;
> >>> +s->iommus.le_prev = NULL;
> >>> +QLIST_INIT(>spaces);
> >>> +qemu_cond_init(>core_cond);
> >>> +qemu_mutex_init(>core_lock);
> >>> +qemu_spin_init(>regs_lock);
> >>> +qemu_thread_create(>core_proc, "riscv-iommu-core",
> >>> +riscv_iommu_core_proc, s, QEMU_THREAD_JOINABLE);
> >>
> >> In our experience, using QEMU thread increases the latency of command
> >> queue processing,
> >> which leads to the potential IOMMU fence timeout in the Linux driver
> >> when using IOMMU with KVM,
> >> e.g. booting the guest Linux.
> >>
> >> Is it possible to remove the thread from the IOMMU just like ARM, AMD,
> >> and Intel IOMMU models?
> >
> > Interesting. We've been using this emulation internally in Ventana, with
> > KVM and VFIO, and didn't experience this issue. Drew is on CC and can
> talk
> > more about it.
> >
> > That said, I don't mind this change, assuming it's feasible to make it
> for this
> > first version.  I'll need to check it how other IOMMUs are doing it.
>
>
> I removed the threading and it seems to be working fine without it. I'll
> commit this
> change for v3.
>
> >
> >
> >
> >>
> >>> +}
> >>> +
> >
> > (...)
> >
> >>> +
> >>> +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque,
> int devfn)
> >>> +{
> >>> +RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
> >>> +PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
> >>> +AddressSpace *as = NULL;
> >>> +
> >>> +if (pdev && pci_is_iommu(pdev)) {
> >>> +return s->target_as;
> >>> +}
> >>> +
> >>> +/* Find first registered IOMMU device */
> >>> +while (s->iommus.le_prev) {
> >>> +s = *(s->iommus.le_prev);
> >>> +}
> >>> +
> >>> +/* Find first matching IOMMU */
> >>> +while (s != NULL && as == NULL) {
> >>> +as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus),
> devfn));
> >>
> >> For pci_bus_num(),
> >> riscv_iommu_find_as() can be called at the very early stage
> >> where software has no chance to enumerate the bus numbers.
>
> I investigated and this doesn't seem to be a problem. This function is
> called at the
> last step of the realize() steps of both riscv_iommu_pci_realize() and
> riscv_iommu_sys_realize(), and by that time the pci_bus_num() is already
> assigned.
> Other iommus use pci_bus_num() into their own get_address_space()
> callbacks like
> this too.
>

Hi Daniel,

IIUC, pci_bus_num() by default is assigned to pcibus_num():

static int pcibus_num(PCIBus *bus)
{
if (pci_bus_is_root(bus)) {
return 0; /* pci host bridge */
}
return bus->parent_dev->config[PCI_SECONDARY_BUS];
}

If the bus is not the root bus, it tries to read the bus' parent device's
secondary bus number (PCI_SECONDARY_BUS) field in the PCI configuration
space.
This field should be programmable by the SW during PCIe enumeration.
But I don't think SW has a chance to be executed before
riscv_iommu_sys_realize() is called,
since it's pretty early before CPU's execution unless RISC-V IOMMU is
hot-plugged.
Even if RISC-V IOMMU is hot-plugged, I think riscv_iommu_sys_realize() is
still called
before SW aware of the existence of IOMMU on the PCI topology tree.

Do you think this matches your observation?

Regards,
Frank Chang


>
>
> Thanks,
>
>
> Daniel
>
>
> >
> > I'll see how other IOMMUs are handling their iommu_find_as()
> >
> >
> > Thanks,
> >
> >
> > Daniel
> >
> >
> >>
> >>
> >>
> >>
> >>> +s = s->iommus.le_next;
> >>> +}
> >>> +
> >>> +return as ? as : _space_memory;
> >>> +}
> >>> +
> >>> +static const PCIIOMMUOps riscv_iommu_ops = {
> >>> +.get_address_space = riscv_iommu_find_as,
> >>> +};
> >>> +
> >>> +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
> >>> +Error **errp)
> >>> +{
> >>> +

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-14 Thread Daniel Henrique Barboza

Hi Jason,

On 5/1/24 08:57, Jason Chien wrote:

Daniel Henrique Barboza 於 2024/3/8 上午 12:03 寫道:

From: Tomasz Jeznach

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found
at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf
Signed-off-by: Sebastien Boeuf
Signed-off-by: Tomasz Jeznach
Signed-off-by: Daniel Henrique Barboza
---
  hw/riscv/Kconfig |4 +
  hw/riscv/meson.build |1 +
  hw/riscv/riscv-iommu.c   | 1492 ++
  hw/riscv/riscv-iommu.h   |  141 
  hw/riscv/trace-events|   11 +
  hw/riscv/trace.h |2 +
  include/hw/riscv/iommu.h |   36 +
  meson.build  |1 +
  8 files changed, 1688 insertions(+)
  create mode 100644 hw/riscv/riscv-iommu.c
  create mode 100644 hw/riscv/riscv-iommu.h
  create mode 100644 hw/riscv/trace-events
  create mode 100644 hw/riscv/trace.h
  create mode 100644 include/hw/riscv/iommu.h

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..faf6a10029 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+bool
+


(...)


+
+/* IOMMU index for transactions without PASID specified. */
+#define RISCV_IOMMU_NOPASID 0
+
+static void riscv_iommu_notify(RISCVIOMMUState *s, int vec)
+{
+const uint32_t ipsr =
+riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec), 0);
+const uint32_t ivec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IVEC);
+if (s->notify && !(ipsr & (1 << vec))) {
+s->notify(s, (ivec >> (vec * 4)) & 0x0F);
+}
+}

The RISC-V IOMMU also supports WSI.

+


I mentioned in the review with Frank that this impl does not support WSI, but
it really seems clearer to do the check here nevertheless. I'll add it.



+static void riscv_iommu_fault(RISCVIOMMUState *s,
+  struct riscv_iommu_fq_record *ev)
+{


(...)


+
+/*
+ * Check supported device id width (in bits).
+ * See IOMMU Specification, Chapter 6. Software guidelines.
+ * - if extended device-context format is used:
+ *   1LVL: 6, 2LVL: 15, 3LVL: 24
+ * - if base device-context format is used:
+ *   1LVL: 7, 2LVL: 16, 3LVL: 24
+ */
+if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2 {
+return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;


The cause should be 260 not 258.

 From the RISC-V IOMMU Architecture Spec v1.0.0 section 2.3:
If the device_id is wider than that supported by the IOMMU mode, as determined by the 
following checks then stop and report "Transaction type disallowed" (cause = 
260).
a. ddtp.iommu_mode is 2LVL and DDI[2] is not 0
b. ddtp.iommu_mode is 1LVL and either DDI[2] is not 0 or DDI[1] is not 0



Changed.


+}
+
+/* Device directory tree walk */
+for (; depth-- > 0; ) {
+/*
+ * Select device id index bits based on device directory tree level
+ * and device context format.
+ * See IOMMU Specification, Chapter 2. Data Structures.
+ * - if extended device-context format is used:
+ *   device index: [23:15][14:6][5:0]
+ * - if base device-context format is used:
+ *   device index: [23:16][15:7][6:0]
+ */
+const int split = depth * 9 + 6 + dc_fmt;
+addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
+if (dma_memory_read(s->target_as, addr, , sizeof(de),
+MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+}
+le64_to_cpus();
+if (!(de & RISCV_IOMMU_DDTE_VALID)) {
+/* invalid directory entry */
+return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+}
+if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
+/* reserved bits set */
+return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;


The cause should be 259 not 258.

 From RISC-V IOMMU Architecture Spec v1.0.0 section 2.3.1:
If any bits or encoding that are reserved for future standard use are set within ddte, 
stop and report "DDT entry misconfigured" (cause = 259).


Changed




+}
+addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
+}
+
+/* index into device context entry page */
+addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
+
+memset(, 0, sizeof(dc));
+if (dma_memory_read(s->target_as, addr, , dc_len,
+MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+}
+
+/* Set translation context. */
+ctx->tc = le64_to_cpu(dc.tc);
+ctx->ta = le64_to_cpu(dc.ta);
+

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-13 Thread Daniel Henrique Barboza

Hi Frank,

On 5/10/24 07:58, Frank Chang wrote:

Hi Daniel,

Daniel Henrique Barboza  於 2024年5月8日 週三 下午7:16寫道:


Hi Frank,

I'll reply with that I've done so far. Still missing some stuff:

On 5/2/24 08:37, Frank Chang wrote:

Hi Daniel,

Daniel Henrique Barboza  於 2024年3月8日 週五 上午12:04寫道:



(...)



In our experience, using QEMU thread increases the latency of command
queue processing,
which leads to the potential IOMMU fence timeout in the Linux driver
when using IOMMU with KVM,
e.g. booting the guest Linux.

Is it possible to remove the thread from the IOMMU just like ARM, AMD,
and Intel IOMMU models?


Interesting. We've been using this emulation internally in Ventana, with
KVM and VFIO, and didn't experience this issue. Drew is on CC and can talk
more about it.


We've developed IOFENCE timeout detection mechanism in our Linux
driver internally
to detect the long-run IOFENCE command on the hardware.

However, we hit the assertion when running on QEMU
and the issue was resolved after we removed the thread from IOMMU model.
However, the assertion didn't happen on our hardware.

Regards,
Frank CHang



I see. Well, one more reason to remove the threading for v3 then. I removed it 
and
it seems to be working as usual in my tests, i.e. no perceptible performance or
behavior impacts. Thanks,


Daniel






That said, I don't mind this change, assuming it's feasible to make it for this
first version.  I'll need to check it how other IOMMUs are doing it.






+}
+


(...)


+
+static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
+{
+RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
+PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+AddressSpace *as = NULL;
+
+if (pdev && pci_is_iommu(pdev)) {
+return s->target_as;
+}
+
+/* Find first registered IOMMU device */
+while (s->iommus.le_prev) {
+s = *(s->iommus.le_prev);
+}
+
+/* Find first matching IOMMU */
+while (s != NULL && as == NULL) {
+as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));


For pci_bus_num(),
riscv_iommu_find_as() can be called at the very early stage
where software has no chance to enumerate the bus numbers.


I'll see how other IOMMUs are handling their iommu_find_as()


Thanks,


Daniel








+s = s->iommus.le_next;
+}
+
+return as ? as : _space_memory;
+}
+
+static const PCIIOMMUOps riscv_iommu_ops = {
+.get_address_space = riscv_iommu_find_as,
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+Error **errp)
+{
+if (bus->iommu_ops &&
+bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
+/* Allow multiple IOMMUs on the same PCIe bus, link known devices */
+RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
+QLIST_INSERT_AFTER(last, iommu, iommus);
+} else if (bus->iommu_ops == NULL) {
+pci_setup_iommu(bus, _iommu_ops, iommu);
+} else {
+error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
+pci_bus_num(bus));
+}
+}
+
+static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
+MemTxAttrs attrs)
+{
+return attrs.unspecified ? RISCV_IOMMU_NOPASID : (int)attrs.pasid;
+}
+
+static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
+{
+RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+return 1 << as->iommu->pasid_bits;
+}
+
+static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
+{
+IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+imrc->translate = riscv_iommu_memory_region_translate;
+imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
+imrc->attrs_to_index = riscv_iommu_memory_region_index;
+imrc->num_indexes = riscv_iommu_memory_region_index_len;
+}
+
+static const TypeInfo riscv_iommu_memory_region_info = {
+.parent = TYPE_IOMMU_MEMORY_REGION,
+.name = TYPE_RISCV_IOMMU_MEMORY_REGION,
+.class_init = riscv_iommu_memory_region_init,
+};
+
+static void riscv_iommu_register_mr_types(void)
+{
+type_register_static(_iommu_memory_region_info);
+type_register_static(_iommu_info);
+}
+
+type_init(riscv_iommu_register_mr_types);
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
new file mode 100644
index 00..6f740de690
--- /dev/null
+++ b/hw/riscv/riscv-iommu.h
@@ -0,0 +1,141 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU (Ziommu)
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-13 Thread Daniel Henrique Barboza

Hi Frank,


On 5/8/24 08:15, Daniel Henrique Barboza wrote:

Hi Frank,

I'll reply with that I've done so far. Still missing some stuff:

On 5/2/24 08:37, Frank Chang wrote:

Hi Daniel,

Daniel Henrique Barboza  於 2024年3月8日 週五 上午12:04寫道:


From: Tomasz Jeznach 

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found
at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf 
Signed-off-by: Sebastien Boeuf 
Signed-off-by: Tomasz Jeznach 
Signed-off-by: Daniel Henrique Barboza 
---
  hw/riscv/Kconfig |    4 +


(...)


+
+    s->iommus.le_next = NULL;
+    s->iommus.le_prev = NULL;
+    QLIST_INIT(>spaces);
+    qemu_cond_init(>core_cond);
+    qemu_mutex_init(>core_lock);
+    qemu_spin_init(>regs_lock);
+    qemu_thread_create(>core_proc, "riscv-iommu-core",
+    riscv_iommu_core_proc, s, QEMU_THREAD_JOINABLE);


In our experience, using QEMU thread increases the latency of command
queue processing,
which leads to the potential IOMMU fence timeout in the Linux driver
when using IOMMU with KVM,
e.g. booting the guest Linux.

Is it possible to remove the thread from the IOMMU just like ARM, AMD,
and Intel IOMMU models?


Interesting. We've been using this emulation internally in Ventana, with
KVM and VFIO, and didn't experience this issue. Drew is on CC and can talk
more about it.

That said, I don't mind this change, assuming it's feasible to make it for this
first version.  I'll need to check it how other IOMMUs are doing it.



I removed the threading and it seems to be working fine without it. I'll commit 
this
change for v3.








+}
+


(...)


+
+static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
+{
+    RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
+    PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+    AddressSpace *as = NULL;
+
+    if (pdev && pci_is_iommu(pdev)) {
+    return s->target_as;
+    }
+
+    /* Find first registered IOMMU device */
+    while (s->iommus.le_prev) {
+    s = *(s->iommus.le_prev);
+    }
+
+    /* Find first matching IOMMU */
+    while (s != NULL && as == NULL) {
+    as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));


For pci_bus_num(),
riscv_iommu_find_as() can be called at the very early stage
where software has no chance to enumerate the bus numbers.


I investigated and this doesn't seem to be a problem. This function is called 
at the
last step of the realize() steps of both riscv_iommu_pci_realize() and
riscv_iommu_sys_realize(), and by that time the pci_bus_num() is already 
assigned.
Other iommus use pci_bus_num() into their own get_address_space() callbacks like
this too.


Thanks,


Daniel




I'll see how other IOMMUs are handling their iommu_find_as()


Thanks,


Daniel








+    s = s->iommus.le_next;
+    }
+
+    return as ? as : _space_memory;
+}
+
+static const PCIIOMMUOps riscv_iommu_ops = {
+    .get_address_space = riscv_iommu_find_as,
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+    Error **errp)
+{
+    if (bus->iommu_ops &&
+    bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
+    /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
+    RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
+    QLIST_INSERT_AFTER(last, iommu, iommus);
+    } else if (bus->iommu_ops == NULL) {
+    pci_setup_iommu(bus, _iommu_ops, iommu);
+    } else {
+    error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
+    pci_bus_num(bus));
+    }
+}
+
+static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
+    MemTxAttrs attrs)
+{
+    return attrs.unspecified ? RISCV_IOMMU_NOPASID : (int)attrs.pasid;
+}
+
+static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
+{
+    RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+    return 1 << as->iommu->pasid_bits;
+}
+
+static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+    imrc->translate = riscv_iommu_memory_region_translate;
+    imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
+    imrc->attrs_to_index = riscv_iommu_memory_region_index;
+    imrc->num_indexes = riscv_iommu_memory_region_index_len;
+}
+
+static const TypeInfo riscv_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
+    .class_init = riscv_iommu_memory_region_init,
+};
+
+static void riscv_iommu_register_mr_types(void)
+{
+    

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-10 Thread Frank Chang
Hi Daniel,

Daniel Henrique Barboza  於 2024年5月8日 週三 下午7:16寫道:
>
> Hi Frank,
>
> I'll reply with that I've done so far. Still missing some stuff:
>
> On 5/2/24 08:37, Frank Chang wrote:
> > Hi Daniel,
> >
> > Daniel Henrique Barboza  於 2024年3月8日 週五 
> > 上午12:04寫道:
> >>
> >> From: Tomasz Jeznach 
> >>
> >> The RISC-V IOMMU specification is now ratified as-per the RISC-V
> >> international process. The latest frozen specifcation can be found
> >> at:
> >>
> >> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf
> >>
> >> Add the foundation of the device emulation for RISC-V IOMMU, which
> >> includes an IOMMU that has no capabilities but MSI interrupt support and
> >> fault queue interfaces. We'll add add more features incrementally in the
> >> next patches.
> >>
> >> Co-developed-by: Sebastien Boeuf 
> >> Signed-off-by: Sebastien Boeuf 
> >> Signed-off-by: Tomasz Jeznach 
> >> Signed-off-by: Daniel Henrique Barboza 
> >> ---
> >>   hw/riscv/Kconfig |4 +
> >>   hw/riscv/meson.build |1 +
> >>   hw/riscv/riscv-iommu.c   | 1492 ++
> >>   hw/riscv/riscv-iommu.h   |  141 
> >>   hw/riscv/trace-events|   11 +
> >>   hw/riscv/trace.h |2 +
> >>   include/hw/riscv/iommu.h |   36 +
> >>   meson.build  |1 +
> >>   8 files changed, 1688 insertions(+)
> >>   create mode 100644 hw/riscv/riscv-iommu.c
> >>   create mode 100644 hw/riscv/riscv-iommu.h
> >>   create mode 100644 hw/riscv/trace-events
> >>   create mode 100644 hw/riscv/trace.h
> >>   create mode 100644 include/hw/riscv/iommu.h
> >>
>
> (...)
>
> +{
> >> +const uint32_t ipsr =
> >> +riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec), 0);
> >> +const uint32_t ivec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IVEC);
> >> +if (s->notify && !(ipsr & (1 << vec))) {
> >> +s->notify(s, (ivec >> (vec * 4)) & 0x0F);
> >> +}
> >
> > s->notify is assigned to riscv_iommu_pci_notify() only.
> > There's no way to assert the wire-signaled interrupt.
> >
> > We should also check fctl.WSI before asserting the interrupt.
> >
>
> This implementation does not support wire-signalled interrupts. It supports 
> only
> MSI, i.e. capabililities.IGS is always MSI (0). For this reason the code is 
> also
> not checking for fctl.WSI.
>
>
>
> >> +}
>   (...)
>
> >> +g_hash_table_unref(ctx_cache);
> >> +*ref = NULL;
> >> +
> >> +if (!(ctx->tc & RISCV_IOMMU_DC_TC_DTF)) {
> >
> > riscv_iommu_ctx_fetch() may return:
> > RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED (256)
> > RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT (257)
> > RISCV_IOMMU_FQ_CAUSE_DDT_INVALID (258)
> > RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED (259)
> >
> > These faults are reported even when DTF is set to 1.
> > We should report these faults regardless of DTF setting.
>
>
> I created a "riscv_iommu_report_fault()" helper to centralize all the report 
> fault
> logic. This helper will check for DTF and, if set, we'll check the 'cause' to 
> see if
> we still want the fault to be reported or not. This helper is then used in 
> these 2
> instances where we're creating a fault by hand. It's also used extensively in
> riscv_iommu_msi_write() to handle all the cases you mentioned above where we
> weren't issuing faults.
>
>
> >
> >> +struct riscv_iommu_fq_record ev = { 0 };
> >> +ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, fault);
> >> +ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE,
> >> +RISCV_IOMMU_FQ_TTYPE_UADDR_RD);
> >> +ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, devid);
> >> +ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, pasid);
> >> +ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, !!pasid);
> >> +riscv_iommu_fault(s, );
> >> +}
> >> +
> >> +g_free(ctx);
> >> +return NULL;
> >> +}
> >> +
> >> +static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
> >> +{
> >> +if (ref) {
> >> +g_hash_table_unref((GHashTable *)ref);
> >> +}
> >> +}
> >> +
> >> +/* Find or allocate address space for a given device */
> >> +static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
> >> +{
> >> +RISCVIOMMUSpace *as;
> >> +
> >> +/* FIXME: PCIe bus remapping for attached endpoints. */
> >> +devid |= s->bus << 8;
> >> +
> >> +qemu_mutex_lock(>core_lock);
> >> +QLIST_FOREACH(as, >spaces, list) {
> >> +if (as->devid == devid) {
> >> +break;
> >> +}
> >> +}
> >> +qemu_mutex_unlock(>core_lock);
> >> +
> >> +if (as == NULL) {
> >> +char name[64];
> >> +as = g_new0(RISCVIOMMUSpace, 1);
> >> +
> >> +as->iommu = s;
> >> +as->devid = devid;
> >> +
> >> +snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
> >> +PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), 
> >> PCI_FUNC(as->devid));
> >> +
> >> +/* IOVA address space, untranslated 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-08 Thread Daniel Henrique Barboza

Hi Frank,

I'll reply with that I've done so far. Still missing some stuff:

On 5/2/24 08:37, Frank Chang wrote:

Hi Daniel,

Daniel Henrique Barboza  於 2024年3月8日 週五 上午12:04寫道:


From: Tomasz Jeznach 

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found
at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf 
Signed-off-by: Sebastien Boeuf 
Signed-off-by: Tomasz Jeznach 
Signed-off-by: Daniel Henrique Barboza 
---
  hw/riscv/Kconfig |4 +
  hw/riscv/meson.build |1 +
  hw/riscv/riscv-iommu.c   | 1492 ++
  hw/riscv/riscv-iommu.h   |  141 
  hw/riscv/trace-events|   11 +
  hw/riscv/trace.h |2 +
  include/hw/riscv/iommu.h |   36 +
  meson.build  |1 +
  8 files changed, 1688 insertions(+)
  create mode 100644 hw/riscv/riscv-iommu.c
  create mode 100644 hw/riscv/riscv-iommu.h
  create mode 100644 hw/riscv/trace-events
  create mode 100644 hw/riscv/trace.h
  create mode 100644 include/hw/riscv/iommu.h



(...)

+{

+const uint32_t ipsr =
+riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec), 0);
+const uint32_t ivec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IVEC);
+if (s->notify && !(ipsr & (1 << vec))) {
+s->notify(s, (ivec >> (vec * 4)) & 0x0F);
+}


s->notify is assigned to riscv_iommu_pci_notify() only.
There's no way to assert the wire-signaled interrupt.

We should also check fctl.WSI before asserting the interrupt.



This implementation does not support wire-signalled interrupts. It supports only
MSI, i.e. capabililities.IGS is always MSI (0). For this reason the code is also
not checking for fctl.WSI.




+}

 (...)


+g_hash_table_unref(ctx_cache);
+*ref = NULL;
+
+if (!(ctx->tc & RISCV_IOMMU_DC_TC_DTF)) {


riscv_iommu_ctx_fetch() may return:
RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED (256)
RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT (257)
RISCV_IOMMU_FQ_CAUSE_DDT_INVALID (258)
RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED (259)

These faults are reported even when DTF is set to 1.
We should report these faults regardless of DTF setting.



I created a "riscv_iommu_report_fault()" helper to centralize all the report 
fault
logic. This helper will check for DTF and, if set, we'll check the 'cause' to 
see if
we still want the fault to be reported or not. This helper is then used in 
these 2
instances where we're creating a fault by hand. It's also used extensively in
riscv_iommu_msi_write() to handle all the cases you mentioned above where we
weren't issuing faults.





+struct riscv_iommu_fq_record ev = { 0 };
+ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, fault);
+ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE,
+RISCV_IOMMU_FQ_TTYPE_UADDR_RD);
+ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, devid);
+ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, pasid);
+ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, !!pasid);
+riscv_iommu_fault(s, );
+}
+
+g_free(ctx);
+return NULL;
+}
+
+static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
+{
+if (ref) {
+g_hash_table_unref((GHashTable *)ref);
+}
+}
+
+/* Find or allocate address space for a given device */
+static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
+{
+RISCVIOMMUSpace *as;
+
+/* FIXME: PCIe bus remapping for attached endpoints. */
+devid |= s->bus << 8;
+
+qemu_mutex_lock(>core_lock);
+QLIST_FOREACH(as, >spaces, list) {
+if (as->devid == devid) {
+break;
+}
+}
+qemu_mutex_unlock(>core_lock);
+
+if (as == NULL) {
+char name[64];
+as = g_new0(RISCVIOMMUSpace, 1);
+
+as->iommu = s;
+as->devid = devid;
+
+snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
+PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+
+/* IOVA address space, untranslated addresses */
+memory_region_init_iommu(>iova_mr, sizeof(as->iova_mr),
+TYPE_RISCV_IOMMU_MEMORY_REGION,
+OBJECT(as), name, UINT64_MAX);
+address_space_init(>iova_as, MEMORY_REGION(>iova_mr),
+TYPE_RISCV_IOMMU_PCI);


Why do we use TYPE_RISCV_IOMMU_PCI as the address space name here?



This is an error. TYPE_RISCV_IOMMU_PCI is the name of the PCI IOMMU device.

Seeing other iommus in QEMU it seems like the name of memory region is a simple
string, e.g. "amd_iommu", and then the name of the address space of the device
is something that includes the device identification.

I'll change this to 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-02 Thread Frank Chang
Hi Daniel,

Daniel Henrique Barboza  於 2024年3月8日 週五 上午12:04寫道:
>
> From: Tomasz Jeznach 
>
> The RISC-V IOMMU specification is now ratified as-per the RISC-V
> international process. The latest frozen specifcation can be found
> at:
>
> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf
>
> Add the foundation of the device emulation for RISC-V IOMMU, which
> includes an IOMMU that has no capabilities but MSI interrupt support and
> fault queue interfaces. We'll add add more features incrementally in the
> next patches.
>
> Co-developed-by: Sebastien Boeuf 
> Signed-off-by: Sebastien Boeuf 
> Signed-off-by: Tomasz Jeznach 
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  hw/riscv/Kconfig |4 +
>  hw/riscv/meson.build |1 +
>  hw/riscv/riscv-iommu.c   | 1492 ++
>  hw/riscv/riscv-iommu.h   |  141 
>  hw/riscv/trace-events|   11 +
>  hw/riscv/trace.h |2 +
>  include/hw/riscv/iommu.h |   36 +
>  meson.build  |1 +
>  8 files changed, 1688 insertions(+)
>  create mode 100644 hw/riscv/riscv-iommu.c
>  create mode 100644 hw/riscv/riscv-iommu.h
>  create mode 100644 hw/riscv/trace-events
>  create mode 100644 hw/riscv/trace.h
>  create mode 100644 include/hw/riscv/iommu.h
>
> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> index 5d644eb7b1..faf6a10029 100644
> --- a/hw/riscv/Kconfig
> +++ b/hw/riscv/Kconfig
> @@ -1,3 +1,6 @@
> +config RISCV_IOMMU
> +bool
> +
>  config RISCV_NUMA
>  bool
>
> @@ -38,6 +41,7 @@ config RISCV_VIRT
>  select SERIAL
>  select RISCV_ACLINT
>  select RISCV_APLIC
> +select RISCV_IOMMU
>  select RISCV_IMSIC
>  select SIFIVE_PLIC
>  select SIFIVE_TEST
> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> index 2f7ee81be3..ba9eebd605 100644
> --- a/hw/riscv/meson.build
> +++ b/hw/riscv/meson.build
> @@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: 
> files('sifive_u.c'))
>  riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
>  riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: 
> files('microchip_pfsoc.c'))
>  riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
> +riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c'))
>
>  hw_arch += {'riscv': riscv_ss}
> diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
> new file mode 100644
> index 00..df534b99b0
> --- /dev/null
> +++ b/hw/riscv/riscv-iommu.c
> @@ -0,0 +1,1492 @@
> +/*
> + * QEMU emulation of an RISC-V IOMMU (Ziommu)
> + *
> + * Copyright (C) 2021-2023, Rivos Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program; if not, see .
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qom/object.h"
> +#include "hw/pci/pci_bus.h"
> +#include "hw/pci/pci_device.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/riscv/riscv_hart.h"
> +#include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qemu/timer.h"
> +
> +#include "cpu_bits.h"
> +#include "riscv-iommu.h"
> +#include "riscv-iommu-bits.h"
> +#include "trace.h"
> +
> +#define LIMIT_CACHE_CTX   (1U << 7)
> +#define LIMIT_CACHE_IOT   (1U << 20)
> +
> +/* Physical page number coversions */
> +#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
> +#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
> +
> +typedef struct RISCVIOMMUContext RISCVIOMMUContext;
> +typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
> +
> +/* Device assigned I/O address space */
> +struct RISCVIOMMUSpace {
> +IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
> +AddressSpace iova_as;   /* IOVA address space for attached device */
> +RISCVIOMMUState *iommu; /* Managing IOMMU device state */
> +uint32_t devid; /* Requester identifier, AKA device_id */
> +bool notifier;  /* IOMMU unmap notifier enabled */
> +QLIST_ENTRY(RISCVIOMMUSpace) list;
> +};
> +
> +/* Device translation context state. */
> +struct RISCVIOMMUContext {
> +uint64_t devid:24;  /* Requester Id, AKA device_id */
> +uint64_t pasid:20;  /* Process Address Space ID */
> +uint64_t __rfu:20;  /* reserved */
> +uint64_t tc;/* Translation Control */
> +uint64_t ta;/* Translation Attributes */
> +uint64_t msi_addr_mask; /* MSI 

Re: [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-05-01 Thread Jason Chien

Daniel Henrique Barboza 於 2024/3/8 上午 12:03 寫道:

From: Tomasz Jeznach

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found
at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf
Signed-off-by: Sebastien Boeuf
Signed-off-by: Tomasz Jeznach
Signed-off-by: Daniel Henrique Barboza
---
  hw/riscv/Kconfig |4 +
  hw/riscv/meson.build |1 +
  hw/riscv/riscv-iommu.c   | 1492 ++
  hw/riscv/riscv-iommu.h   |  141 
  hw/riscv/trace-events|   11 +
  hw/riscv/trace.h |2 +
  include/hw/riscv/iommu.h |   36 +
  meson.build  |1 +
  8 files changed, 1688 insertions(+)
  create mode 100644 hw/riscv/riscv-iommu.c
  create mode 100644 hw/riscv/riscv-iommu.h
  create mode 100644 hw/riscv/trace-events
  create mode 100644 hw/riscv/trace.h
  create mode 100644 include/hw/riscv/iommu.h

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..faf6a10029 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+bool
+
  config RISCV_NUMA
  bool
  
@@ -38,6 +41,7 @@ config RISCV_VIRT

  select SERIAL
  select RISCV_ACLINT
  select RISCV_APLIC
+select RISCV_IOMMU
  select RISCV_IMSIC
  select SIFIVE_PLIC
  select SIFIVE_TEST
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 2f7ee81be3..ba9eebd605 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: 
files('sifive_u.c'))
  riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
  riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: 
files('microchip_pfsoc.c'))
  riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c'))
  
  hw_arch += {'riscv': riscv_ss}

diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
new file mode 100644
index 00..df534b99b0
--- /dev/null
+++ b/hw/riscv/riscv-iommu.c
@@ -0,0 +1,1492 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU (Ziommu)
+ *
+ * Copyright (C) 2021-2023, Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+#define LIMIT_CACHE_CTX   (1U << 7)
+#define LIMIT_CACHE_IOT   (1U << 20)
+
+/* Physical page number coversions */
+#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
+#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
+
+/* Device assigned I/O address space */
+struct RISCVIOMMUSpace {
+IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
+AddressSpace iova_as;   /* IOVA address space for attached device */
+RISCVIOMMUState *iommu; /* Managing IOMMU device state */
+uint32_t devid; /* Requester identifier, AKA device_id */
+bool notifier;  /* IOMMU unmap notifier enabled */
+QLIST_ENTRY(RISCVIOMMUSpace) list;
+};
+
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+uint64_t devid:24;  /* Requester Id, AKA device_id */
+uint64_t pasid:20;  /* Process Address Space ID */
+uint64_t __rfu:20;  /* reserved */
+uint64_t tc;/* Translation Control */
+uint64_t ta;/* Translation Attributes */
+uint64_t msi_addr_mask; /* MSI filtering - address mask */
+uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
+uint64_t msiptp;/* MSI redirection page table pointer */
+};
+
+/* IOMMU index for transactions without PASID specified. */

[PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation

2024-03-07 Thread Daniel Henrique Barboza
From: Tomasz Jeznach 

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found
at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf 
Signed-off-by: Sebastien Boeuf 
Signed-off-by: Tomasz Jeznach 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/Kconfig |4 +
 hw/riscv/meson.build |1 +
 hw/riscv/riscv-iommu.c   | 1492 ++
 hw/riscv/riscv-iommu.h   |  141 
 hw/riscv/trace-events|   11 +
 hw/riscv/trace.h |2 +
 include/hw/riscv/iommu.h |   36 +
 meson.build  |1 +
 8 files changed, 1688 insertions(+)
 create mode 100644 hw/riscv/riscv-iommu.c
 create mode 100644 hw/riscv/riscv-iommu.h
 create mode 100644 hw/riscv/trace-events
 create mode 100644 hw/riscv/trace.h
 create mode 100644 include/hw/riscv/iommu.h

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..faf6a10029 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+bool
+
 config RISCV_NUMA
 bool
 
@@ -38,6 +41,7 @@ config RISCV_VIRT
 select SERIAL
 select RISCV_ACLINT
 select RISCV_APLIC
+select RISCV_IOMMU
 select RISCV_IMSIC
 select SIFIVE_PLIC
 select SIFIVE_TEST
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 2f7ee81be3..ba9eebd605 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: 
files('sifive_u.c'))
 riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
 riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: 
files('microchip_pfsoc.c'))
 riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c'))
 
 hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
new file mode 100644
index 00..df534b99b0
--- /dev/null
+++ b/hw/riscv/riscv-iommu.c
@@ -0,0 +1,1492 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU (Ziommu)
+ *
+ * Copyright (C) 2021-2023, Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+#define LIMIT_CACHE_CTX   (1U << 7)
+#define LIMIT_CACHE_IOT   (1U << 20)
+
+/* Physical page number coversions */
+#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
+#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
+
+/* Device assigned I/O address space */
+struct RISCVIOMMUSpace {
+IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
+AddressSpace iova_as;   /* IOVA address space for attached device */
+RISCVIOMMUState *iommu; /* Managing IOMMU device state */
+uint32_t devid; /* Requester identifier, AKA device_id */
+bool notifier;  /* IOMMU unmap notifier enabled */
+QLIST_ENTRY(RISCVIOMMUSpace) list;
+};
+
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+uint64_t devid:24;  /* Requester Id, AKA device_id */
+uint64_t pasid:20;  /* Process Address Space ID */
+uint64_t __rfu:20;  /* reserved */
+uint64_t tc;/* Translation Control */
+uint64_t ta;/* Translation Attributes */
+uint64_t msi_addr_mask; /* MSI filtering - address mask */
+uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
+uint64_t msiptp;/* MSI redirection page table pointer */
+};
+
+/* IOMMU index for transactions without PASID specified. */
+#define RISCV_IOMMU_NOPASID 0
+
+static void