Update V2 for cleaner modification, less code influence: https://lists.nongnu.org/archive/html/qemu-devel/2025-08/msg00738.html
On Sun, Aug 3, 2025 at 12:47 PM <guo...@kernel.org> wrote: > > From: "Guo Ren (Alibaba DAMO Academy)" <guo...@kernel.org> > > Current implementation is wrong when iohgatp != bare. The RISC-V > IOMMU specification has defined that the PDT is based on GPA, not > SPA. So this patch fixes the problem, making PDT walk correctly > when the G-stage table walk is enabled. > > Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation") > Cc: qemu-sta...@nongnu.org > Cc: Sebastien Boeuf <s...@rivosinc.com> > Cc: Tomasz Jeznach <tjezn...@rivosinc.com> > Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guo...@kernel.org> > --- > hw/riscv/riscv-iommu.c | 148 ++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 146 insertions(+), 2 deletions(-) > > diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c > index 96a7fbdefcf3..c4dccc9e5c5d 100644 > --- a/hw/riscv/riscv-iommu.c > +++ b/hw/riscv/riscv-iommu.c > @@ -866,6 +866,145 @@ static bool > riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, > return true; > } > > +/** > + * pdt_memory_read: PDT wrapper of dma_memory_read. > + * > + * @s: IOMMU Device State > + * @ctx: Device Translation Context with devid and pasid set > + * nested: Add G-stage translation or not > + * @addr: address within that address space > + * @buf: buffer with the data transferred > + * @len: length of the data transferred > + * @attrs: memory transaction attributes > + */ > +static MemTxResult pdt_memory_read(RISCVIOMMUState *s, > + RISCVIOMMUContext *ctx, bool nested, > + dma_addr_t addr, > + void *buf, dma_addr_t len, > + MemTxAttrs attrs) > +{ > + uint64_t gatp_mode, pte; > + struct { > + unsigned char step; > + unsigned char levels; > + unsigned char ptidxbits; > + unsigned char ptesize; > + } sc; > + MemTxResult ret; > + dma_addr_t base = addr; > + > + if (!nested) > + goto out; > + > + /* G stages translation mode */ > + gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); > + > + /* G stages translation tables root pointer */ > + base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); > + > + /* Start at step 0 */ > + sc.step = 0; > + > + if (s->fctl & RISCV_IOMMU_FCTL_GXL) { > + /* 32bit mode for GXL == 1 */ > + switch (gatp_mode) { > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: > + if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { > + return MEMTX_ACCESS_ERROR; > + } > + sc.levels = 2; > + sc.ptidxbits = 10; > + sc.ptesize = 4; > + break; > + default: > + return MEMTX_ACCESS_ERROR; > + } > + } else { > + /* 64bit mode for GXL == 0 */ > + switch (gatp_mode) { > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: > + if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { > + return MEMTX_ACCESS_ERROR; > + } > + sc.levels = 3; > + sc.ptidxbits = 9; > + sc.ptesize = 8; > + break; > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: > + if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { > + return MEMTX_ACCESS_ERROR; > + } > + sc.levels = 4; > + sc.ptidxbits = 9; > + sc.ptesize = 8; > + break; > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: > + if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { > + return MEMTX_ACCESS_ERROR; > + } > + sc.levels = 5; > + sc.ptidxbits = 9; > + sc.ptesize = 8; > + break; > + default: > + return MEMTX_ACCESS_ERROR; > + } > + } > + > + do { > + const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; > + const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * > + (sc.levels - 1 - sc.step); > + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); > + const dma_addr_t pte_addr = base + idx * sc.ptesize; > + > + /* Address range check before first level lookup */ > + if (!sc.step) { > + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; > + if ((addr & va_mask) != addr) { > + return MEMTX_ACCESS_ERROR; > + } > + } > + > + /* Read page table entry */ > + if (sc.ptesize == 4) { > + uint32_t pte32 = 0; > + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); > + pte = pte32; > + } else { > + ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); > + } > + if (ret != MEMTX_OK) > + return ret; > + > + sc.step++; > + hwaddr ppn = pte >> PTE_PPN_SHIFT; > + > + if (!(pte & PTE_V)) { > + return MEMTX_ACCESS_ERROR; /* Invalid PTE */ > + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { > + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ > + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { > + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + > PTE_X */ > + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { > + return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ > + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { > + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ > + } else { > + /* Leaf PTE, translation completed. */ > + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); > + break; > + } > + > + if (sc.step == sc.levels) { > + return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ > + } > + } while (1); > + > +out: > + return dma_memory_read(s->target_as, base, buf, len, attrs); > +} > + > /* > * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk > * > @@ -884,6 +1023,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > RISCVIOMMUContext *ctx) > const size_t dc_len = sizeof(dc) >> dc_fmt; > int depth; > uint64_t de; > + bool pdt_nested = false; > > switch (mode) { > case RISCV_IOMMU_DDTP_MODE_OFF: > @@ -1029,6 +1169,10 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > RISCVIOMMUContext *ctx) > return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; > } > > + /* Detect nested PDT walk */ > + pdt_nested = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD) != > + RISCV_IOMMU_DC_IOHGATP_MODE_BARE; > + > for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { > riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); > > @@ -1038,7 +1182,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > RISCVIOMMUContext *ctx) > */ > const int split = depth * 9 + 8; > addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; > - if (dma_memory_read(s->target_as, addr, &de, sizeof(de), > + if (pdt_memory_read(s, ctx, pdt_nested, addr, &de, sizeof(de), > MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > } > @@ -1053,7 +1197,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > RISCVIOMMUContext *ctx) > > /* Leaf entry in PDT */ > addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; > - if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, > + if (pdt_memory_read(s, ctx, pdt_nested, addr, &dc.ta, sizeof(uint64_t) * > 2, > MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > } > -- > 2.40.1 > -- Best Regards Guo Ren