On Wed, Aug 6, 2025 at 4:58 PM Nutty Liu <liujin...@lanxincomputing.com> wrote: > > On 8/5/2025 1:12 AM, guo...@kernel.org wrote: > > From: "Guo Ren (Alibaba DAMO Academy)" <guo...@kernel.org> > > > > Current implementation is wrong when iohgatp != bare. The RISC-V > > IOMMU specification has defined that the PDT is based on GPA, not > > SPA. So this patch fixes the problem, making PDT walk correctly > > when the G-stage table walk is enabled. > > > > Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation") > > Cc: qemu-sta...@nongnu.org > > Cc: Sebastien Boeuf <s...@rivosinc.com> > > Cc: Tomasz Jeznach <tjezn...@rivosinc.com> > > Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guo...@kernel.org> > > --- > > Changes in V2: > > - Remove nested param to make patch clearer. > > > > hw/riscv/riscv-iommu.c | 141 ++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 139 insertions(+), 2 deletions(-) > > > > diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c > > index 96a7fbdefcf3..ded3f7b2fdce 100644 > > --- a/hw/riscv/riscv-iommu.c > > +++ b/hw/riscv/riscv-iommu.c > > @@ -866,6 +866,143 @@ static bool > > riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, > > return true; > > } > > > > +/** > > + * pdt_memory_read: PDT wrapper of dma_memory_read. > > + * > > + * @s: IOMMU Device State > > + * @ctx: Device Translation Context with devid and pasid set > > + * @addr: address within that address space > > + * @buf: buffer with the data transferred > > + * @len: length of the data transferred > > + * @attrs: memory transaction attributes > > + */ > > +static MemTxResult pdt_memory_read(RISCVIOMMUState *s, > > + RISCVIOMMUContext *ctx, > > + dma_addr_t addr, > > + void *buf, dma_addr_t len, > > + MemTxAttrs attrs) > > +{ > > + uint64_t gatp_mode, pte; > > + struct { > > + unsigned char step; > > + unsigned char levels; > > + unsigned char ptidxbits; > > + unsigned char ptesize; > > + } sc; > > + MemTxResult ret; > > + dma_addr_t base = addr; > > + > > + /* G stages translation mode */ > > + gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); > > + if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) > > + goto out; > > + > > + /* G stages translation tables root pointer */ > > + base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); > > + > > + /* Start at step 0 */ > > + sc.step = 0; > > + > > + if (s->fctl & RISCV_IOMMU_FCTL_GXL) { > > + /* 32bit mode for GXL == 1 */ > > + switch (gatp_mode) { > > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: > > + if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { > > + return MEMTX_ACCESS_ERROR; > > + } > > + sc.levels = 2; > > + sc.ptidxbits = 10; > > + sc.ptesize = 4; > > + break; > > + default: > > + return MEMTX_ACCESS_ERROR; > > + } > > + } else { > > + /* 64bit mode for GXL == 0 */ > > + switch (gatp_mode) { > > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: > > + if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { > > + return MEMTX_ACCESS_ERROR; > > + } > > + sc.levels = 3; > > + sc.ptidxbits = 9; > > + sc.ptesize = 8; > > + break; > > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: > > + if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { > > + return MEMTX_ACCESS_ERROR; > > + } > > + sc.levels = 4; > > + sc.ptidxbits = 9; > > + sc.ptesize = 8; > > + break; > > + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: > > + if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { > > + return MEMTX_ACCESS_ERROR; > > + } > > + sc.levels = 5; > > + sc.ptidxbits = 9; > > + sc.ptesize = 8; > > + break; > > + default: > > + return MEMTX_ACCESS_ERROR; > > + } > > + } > How about moving the variables of 'gatp_mode', 'base' and 'sc' out of > this wrapper function ? > Since all of them are the same except for 'sc.step' during the traversal > of PDT. This is a fixup patch, so I want to make the wrapper function handle all the necessary tasks to minimize the impact on other code. And the detection of them is low-cost, so no need to complicate the coding convention.
> > Otherwise, > Reviewed-by: Nutty Liu <liujin...@lanxincomputing.com> Thx. > > Thanks, > Nutty > > + > > + do { > > + const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; > > + const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * > > + (sc.levels - 1 - sc.step); > > + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); > > + const dma_addr_t pte_addr = base + idx * sc.ptesize; > > + > > + /* Address range check before first level lookup */ > > + if (!sc.step) { > > + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; > > + if ((addr & va_mask) != addr) { > > + return MEMTX_ACCESS_ERROR; > > + } > > + } > > + > > + /* Read page table entry */ > > + if (sc.ptesize == 4) { > > + uint32_t pte32 = 0; > > + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); > > + pte = pte32; > > + } else { > > + ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); > > + } > > + if (ret != MEMTX_OK) > > + return ret; > > + > > + sc.step++; > > + hwaddr ppn = pte >> PTE_PPN_SHIFT; > > + > > + if (!(pte & PTE_V)) { > > + return MEMTX_ACCESS_ERROR; /* Invalid PTE */ > > + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { > > + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ > > + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { > > + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + > > PTE_X */ > > + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { > > + return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ > > + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { > > + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ > > + } else { > > + /* Leaf PTE, translation completed. */ > > + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); > > + break; > > + } > > + > > + if (sc.step == sc.levels) { > > + return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ > > + } > > + } while (1); > > + > > +out: > > + return dma_memory_read(s->target_as, base, buf, len, attrs); > > +} > > + > > /* > > * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk > > * > > @@ -1038,7 +1175,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > > RISCVIOMMUContext *ctx) > > */ > > const int split = depth * 9 + 8; > > addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; > > - if (dma_memory_read(s->target_as, addr, &de, sizeof(de), > > + if (pdt_memory_read(s, ctx, addr, &de, sizeof(de), > > MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > > return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > > } > > @@ -1053,7 +1190,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, > > RISCVIOMMUContext *ctx) > > > > /* Leaf entry in PDT */ > > addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; > > - if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, > > + if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2, > > MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > > return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > > } -- Best Regards Guo Ren