On Wed, Aug 6, 2025 at 10:19 PM wei li <liwei1...@gmail.com> wrote:
>
>
>
> <guo...@kernel.org> 于2025年8月5日周二 01:12写道:
>>
>> From: "Guo Ren (Alibaba DAMO Academy)" <guo...@kernel.org>
>>
>> Current implementation is wrong when iohgatp != bare. The RISC-V
>> IOMMU specification has defined that the PDT is based on GPA, not
>> SPA. So this patch fixes the problem, making PDT walk correctly
>> when the G-stage table walk is enabled.
>>
>> Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation")
>> Cc: qemu-sta...@nongnu.org
>> Cc: Sebastien Boeuf <s...@rivosinc.com>
>> Cc: Tomasz Jeznach <tjezn...@rivosinc.com>
>> Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guo...@kernel.org>
>> ---
>> Changes in V2:
>>  - Remove nested param to make patch clearer.
>>
>>  hw/riscv/riscv-iommu.c | 141 ++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 139 insertions(+), 2 deletions(-)
>>
> LGTM.
>
> "3.3.2. Process to locate the Process-context
>
> The device-context provides the PDT root page PPN (pdtp.ppn). When 
> DC.iohgatp.mode is not
>
> Bare, pdtp.PPN as well as pdte.PPN are Guest Physical Addresses (GPA) which 
> must be translated
>
> into Supervisor Physical Addresses (SPA) using the second-stage page table 
> pointed to by
>
> DC.iohgatp. The memory accesses to the PDT are treated as implicit read 
> memory accesses by the
>
> second-stage."
>
> Reviewed-by: Weiwei Li <liwei1...@gmail.com>
Thanks for mentioning the spec words. I would add this quotation to
the commit log of the next version.

>
> Regards,
>
> Weiwei Li
>
>
>> diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
>> index 96a7fbdefcf3..ded3f7b2fdce 100644
>> --- a/hw/riscv/riscv-iommu.c
>> +++ b/hw/riscv/riscv-iommu.c
>> @@ -866,6 +866,143 @@ static bool 
>> riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
>>      return true;
>>  }
>>
>> +/**
>> + * pdt_memory_read: PDT wrapper of dma_memory_read.
>> + *
>> + * @s: IOMMU Device State
>> + * @ctx: Device Translation Context with devid and pasid set
>> + * @addr: address within that address space
>> + * @buf: buffer with the data transferred
>> + * @len: length of the data transferred
>> + * @attrs: memory transaction attributes
>> + */
>> +static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
>> +                                   RISCVIOMMUContext *ctx,
>> +                                   dma_addr_t addr,
>> +                                   void *buf, dma_addr_t len,
>> +                                   MemTxAttrs attrs)
>> +{
>> +    uint64_t gatp_mode, pte;
>> +    struct {
>> +        unsigned char step;
>> +        unsigned char levels;
>> +        unsigned char ptidxbits;
>> +        unsigned char ptesize;
>> +    } sc;
>> +    MemTxResult ret;
>> +    dma_addr_t base = addr;
>> +
>> +    /* G stages translation mode */
>> +    gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
>> +    if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE)
>> +        goto out;
>> +
>> +    /* G stages translation tables root pointer */
>> +    base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
>> +
>> +    /* Start at step 0 */
>> +    sc.step = 0;
>> +
>> +    if (s->fctl & RISCV_IOMMU_FCTL_GXL) {
>> +        /* 32bit mode for GXL == 1 */
>> +        switch (gatp_mode) {
>> +        case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
>> +            if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) {
>> +                return MEMTX_ACCESS_ERROR;
>> +            }
>> +            sc.levels    = 2;
>> +            sc.ptidxbits = 10;
>> +            sc.ptesize   = 4;
>> +            break;
>> +        default:
>> +            return MEMTX_ACCESS_ERROR;
>> +        }
>> +    } else {
>> +        /* 64bit mode for GXL == 0 */
>> +        switch (gatp_mode) {
>> +        case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
>> +            if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) {
>> +                return MEMTX_ACCESS_ERROR;
>> +            }
>> +            sc.levels    = 3;
>> +            sc.ptidxbits = 9;
>> +            sc.ptesize   = 8;
>> +            break;
>> +        case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
>> +            if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) {
>> +                return MEMTX_ACCESS_ERROR;
>> +            }
>> +            sc.levels    = 4;
>> +            sc.ptidxbits = 9;
>> +            sc.ptesize   = 8;
>> +            break;
>> +        case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
>> +            if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) {
>> +                return MEMTX_ACCESS_ERROR;
>> +            }
>> +            sc.levels    = 5;
>> +            sc.ptidxbits = 9;
>> +            sc.ptesize   = 8;
>> +            break;
>> +        default:
>> +            return MEMTX_ACCESS_ERROR;
>> +        }
>> +    }
>> +
>> +    do {
>> +        const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits;
>> +        const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits *
>> +                                 (sc.levels - 1 - sc.step);
>> +        const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
>> +        const dma_addr_t pte_addr = base + idx * sc.ptesize;
>> +
>> +        /* Address range check before first level lookup */
>> +        if (!sc.step) {
>> +            const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
>> +            if ((addr & va_mask) != addr) {
>> +                return MEMTX_ACCESS_ERROR;
>> +            }
>> +        }
>> +
>> +        /* Read page table entry */
>> +        if (sc.ptesize == 4) {
>> +            uint32_t pte32 = 0;
>> +            ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs);
>> +            pte = pte32;
>> +        } else {
>> +            ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs);
>> +        }
>> +        if (ret != MEMTX_OK)
>> +            return ret;
>> +
>> +        sc.step++;
>> +        hwaddr ppn = pte >> PTE_PPN_SHIFT;
>> +
>> +        if (!(pte & PTE_V)) {
>> +            return MEMTX_ACCESS_ERROR; /* Invalid PTE */
>> +        } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
>> +            return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */
>> +        } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
>> +            return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + 
>> PTE_X */
>> +        } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
>> +            return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
>> +        } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
>> +            base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
>> +        } else {
>> +            /* Leaf PTE, translation completed. */
>> +            base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
>> +            break;
>> +        }
>> +
>> +        if (sc.step == sc.levels) {
>> +            return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */
>> +        }
>> +    } while (1);
>> +
>> +out:
>> +    return dma_memory_read(s->target_as, base, buf, len, attrs);
>> +}
>> +
>>  /*
>>   * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
>>   *
>> @@ -1038,7 +1175,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, 
>> RISCVIOMMUContext *ctx)
>>           */
>>          const int split = depth * 9 + 8;
>>          addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
>> -        if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
>> +        if (pdt_memory_read(s, ctx, addr, &de, sizeof(de),
>>                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
>>              return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
>>          }
>> @@ -1053,7 +1190,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, 
>> RISCVIOMMUContext *ctx)
>>
>>      /* Leaf entry in PDT */
>>      addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
>> -    if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
>> +    if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2,
>>                          MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
>>          return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
>>      }
>> --
>> 2.40.1
>>


-- 
Best Regards
 Guo Ren

Reply via email to