On Wed, Aug 6, 2025 at 10:19 PM wei li <liwei1...@gmail.com> wrote: > > > > <guo...@kernel.org> 于2025年8月5日周二 01:12写道: >> >> From: "Guo Ren (Alibaba DAMO Academy)" <guo...@kernel.org> >> >> Current implementation is wrong when iohgatp != bare. The RISC-V >> IOMMU specification has defined that the PDT is based on GPA, not >> SPA. So this patch fixes the problem, making PDT walk correctly >> when the G-stage table walk is enabled. >> >> Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation") >> Cc: qemu-sta...@nongnu.org >> Cc: Sebastien Boeuf <s...@rivosinc.com> >> Cc: Tomasz Jeznach <tjezn...@rivosinc.com> >> Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guo...@kernel.org> >> --- >> Changes in V2: >> - Remove nested param to make patch clearer. >> >> hw/riscv/riscv-iommu.c | 141 ++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 139 insertions(+), 2 deletions(-) >> > LGTM. > > "3.3.2. Process to locate the Process-context > > The device-context provides the PDT root page PPN (pdtp.ppn). When > DC.iohgatp.mode is not > > Bare, pdtp.PPN as well as pdte.PPN are Guest Physical Addresses (GPA) which > must be translated > > into Supervisor Physical Addresses (SPA) using the second-stage page table > pointed to by > > DC.iohgatp. The memory accesses to the PDT are treated as implicit read > memory accesses by the > > second-stage." > > Reviewed-by: Weiwei Li <liwei1...@gmail.com> Thanks for mentioning the spec words. I would add this quotation to the commit log of the next version.
> > Regards, > > Weiwei Li > > >> diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c >> index 96a7fbdefcf3..ded3f7b2fdce 100644 >> --- a/hw/riscv/riscv-iommu.c >> +++ b/hw/riscv/riscv-iommu.c >> @@ -866,6 +866,143 @@ static bool >> riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, >> return true; >> } >> >> +/** >> + * pdt_memory_read: PDT wrapper of dma_memory_read. >> + * >> + * @s: IOMMU Device State >> + * @ctx: Device Translation Context with devid and pasid set >> + * @addr: address within that address space >> + * @buf: buffer with the data transferred >> + * @len: length of the data transferred >> + * @attrs: memory transaction attributes >> + */ >> +static MemTxResult pdt_memory_read(RISCVIOMMUState *s, >> + RISCVIOMMUContext *ctx, >> + dma_addr_t addr, >> + void *buf, dma_addr_t len, >> + MemTxAttrs attrs) >> +{ >> + uint64_t gatp_mode, pte; >> + struct { >> + unsigned char step; >> + unsigned char levels; >> + unsigned char ptidxbits; >> + unsigned char ptesize; >> + } sc; >> + MemTxResult ret; >> + dma_addr_t base = addr; >> + >> + /* G stages translation mode */ >> + gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); >> + if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) >> + goto out; >> + >> + /* G stages translation tables root pointer */ >> + base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); >> + >> + /* Start at step 0 */ >> + sc.step = 0; >> + >> + if (s->fctl & RISCV_IOMMU_FCTL_GXL) { >> + /* 32bit mode for GXL == 1 */ >> + switch (gatp_mode) { >> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: >> + if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { >> + return MEMTX_ACCESS_ERROR; >> + } >> + sc.levels = 2; >> + sc.ptidxbits = 10; >> + sc.ptesize = 4; >> + break; >> + default: >> + return MEMTX_ACCESS_ERROR; >> + } >> + } else { >> + /* 64bit mode for GXL == 0 */ >> + switch (gatp_mode) { >> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: >> + if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { >> + return MEMTX_ACCESS_ERROR; >> + } >> + sc.levels = 3; >> + sc.ptidxbits = 9; >> + sc.ptesize = 8; >> + break; >> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: >> + if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { >> + return MEMTX_ACCESS_ERROR; >> + } >> + sc.levels = 4; >> + sc.ptidxbits = 9; >> + sc.ptesize = 8; >> + break; >> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: >> + if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { >> + return MEMTX_ACCESS_ERROR; >> + } >> + sc.levels = 5; >> + sc.ptidxbits = 9; >> + sc.ptesize = 8; >> + break; >> + default: >> + return MEMTX_ACCESS_ERROR; >> + } >> + } >> + >> + do { >> + const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; >> + const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * >> + (sc.levels - 1 - sc.step); >> + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); >> + const dma_addr_t pte_addr = base + idx * sc.ptesize; >> + >> + /* Address range check before first level lookup */ >> + if (!sc.step) { >> + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; >> + if ((addr & va_mask) != addr) { >> + return MEMTX_ACCESS_ERROR; >> + } >> + } >> + >> + /* Read page table entry */ >> + if (sc.ptesize == 4) { >> + uint32_t pte32 = 0; >> + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); >> + pte = pte32; >> + } else { >> + ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); >> + } >> + if (ret != MEMTX_OK) >> + return ret; >> + >> + sc.step++; >> + hwaddr ppn = pte >> PTE_PPN_SHIFT; >> + >> + if (!(pte & PTE_V)) { >> + return MEMTX_ACCESS_ERROR; /* Invalid PTE */ >> + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { >> + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ >> + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { >> + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + >> PTE_X */ >> + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { >> + return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ >> + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { >> + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ >> + } else { >> + /* Leaf PTE, translation completed. */ >> + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); >> + break; >> + } >> + >> + if (sc.step == sc.levels) { >> + return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ >> + } >> + } while (1); >> + >> +out: >> + return dma_memory_read(s->target_as, base, buf, len, attrs); >> +} >> + >> /* >> * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk >> * >> @@ -1038,7 +1175,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, >> RISCVIOMMUContext *ctx) >> */ >> const int split = depth * 9 + 8; >> addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; >> - if (dma_memory_read(s->target_as, addr, &de, sizeof(de), >> + if (pdt_memory_read(s, ctx, addr, &de, sizeof(de), >> MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { >> return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; >> } >> @@ -1053,7 +1190,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, >> RISCVIOMMUContext *ctx) >> >> /* Leaf entry in PDT */ >> addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; >> - if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, >> + if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2, >> MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { >> return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; >> } >> -- >> 2.40.1 >> -- Best Regards Guo Ren