Tested with VMID0 decodings just fine.  Haven't tried VMID1-15 yet.

Signed-off-by: Tom St Denis <[email protected]>
---
 src/lib/read_vram.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 4 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index e2087a252c10..4c74f4521857 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t size, 
void *dst)
        return -1;
 }
 
-
 static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t 
address, uint32_t size, void *dst)
 {
        uint64_t start_addr, page_table_start_addr, page_table_base_addr,
@@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t 
vmid, uint64_t addre
                if (page_table_depth == 1) {
                        // decode addr into pte and pde selectors...
                        pde_idx = (address >> (12 + 9 + page_table_size)) & 
((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
-                       pte_idx = (address >> 12) & ((1ULL << (9 + 
page_table_size)) - 1);
+                       pte_idx = (address >> (12 + page_table_size - 4)) & 
((1ULL << (9 + page_table_size)) - 1);
 
                        // read PDE entry
                        umr_read_vram(asic, 0xFFFF, page_table_base_addr + 
pde_idx * 8, 8, &pde_entry);
@@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, 
uint32_t vmid, uint64_t addre
        return 0;
 }
 
+static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t 
address, uint32_t size, void *dst)
+{
+       uint64_t start_addr, page_table_start_addr, page_table_base_addr,
+                page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
+                pde_address;
+       uint32_t chunk_size, tmp;
+       int page_table_depth, first;
+       struct {
+               uint64_t
+                       frag_size,
+                       pte_base_addr,
+                       valid;
+       } pde_fields;
+       struct {
+               uint64_t
+                       page_base_addr,
+                       fragment,
+                       system,
+                       valid;
+       } pte_fields;
+       char buf[64];
+       unsigned char *pdst = dst;
+
+       /*
+        * PTE format on VI:
+        * 63:40 reserved
+        * 39:12 4k physical page base address
+        * 11:7 fragment
+        * 6 write
+        * 5 read
+        * 4 exe
+        * 3 reserved
+        * 2 snooped
+        * 1 system
+        * 0 valid
+        *
+        * PDE format on VI:
+        * 63:59 block fragment size
+        * 58:40 reserved
+        * 39:1 physical base address of PTE
+        * bits 5:1 must be 0.
+        * 0 valid
+        */
+
+       // read vm registers
+       sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
+               page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, 
buf) << 12;
+       sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
+               page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, 
buf) << 44;
+
+       sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
+               tmp = umr_read_reg_by_name(asic, buf);
+               page_table_depth      = umr_bitslice_reg_by_name(asic, buf, 
"PAGE_TABLE_DEPTH", tmp);
+               page_table_size       = umr_bitslice_reg_by_name(asic, buf, 
"PAGE_TABLE_BLOCK_SIZE", tmp);
+
+       sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
+               page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, 
buf) << 0;
+       sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
+               page_table_base_addr  |= (uint64_t)umr_read_reg_by_name(asic, 
buf) << 32;
+
+       DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
+       DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long 
long)page_table_start_addr);
+       DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long 
long)page_table_base_addr);
+       DEBUG("BASE_SIZE = %lu\n", page_table_size);
+       DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
+
+       address -= page_table_start_addr;
+
+       // AI+ allows 0=default (4KB) whereas VI requires it to be explictly 
set to >=4
+       page_table_size = page_table_size ? page_table_size : 4;
+
+       first = 1;
+       while (size) {
+               if (page_table_depth >= 1) {
+                       // page_table_base_addr is not a PDE entry in this 
config so shift it out (it's a page address)
+                       page_table_base_addr <<= 12;
+                       pte_idx = (address >> (12 + page_table_size - 4)) & 
((1ULL << (9 + page_table_size)) - 1);
+
+                       // AI+ supports more than 1 level of PDEs so we iterate 
for all of the depths
+                       pde_address = address;
+                       while (page_table_depth) {
+                               // decode addr into pte and pde selectors...
+                               pde_idx = (pde_address >> (page_table_depth*9 + 
(12 + page_table_size - 4)));
+
+                               // don't mask the first PDE idx
+                               if (!first)
+                                       pde_idx &= (1ULL << 9) - 1;
+                               first = 0;
+
+                               // read PDE entry
+                               umr_read_vram(asic, 0xFFFF, 
page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+
+                               // decode PDE values
+                               pde_fields.frag_size     = (pde_entry >> 59) & 
0x1F;
+                               pde_fields.pte_base_addr = pde_entry & 
0xFFFFFFFFF000ULL;
+                               pde_fields.valid         = pde_entry & 1;
+                               DEBUG("pde_idx=%llx, frag_size=%u, 
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, 
(unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, 
(int)pde_fields.valid);
+
+                               // for the next round the address we're 
decoding is the phys address in the currently decoded PDE
+                               --page_table_depth;
+                               pde_address = pde_fields.pte_base_addr;
+                       }
+
+                       // now read PTE entry for this page
+                       umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + 
pte_idx*8, 8, &pte_entry);
+
+                       // decode PTE values
+                       pte_fields.page_base_addr = pte_entry & 
0xFFFFFFFFF000ULL;
+                       pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
+                       pte_fields.system         = (pte_entry >> 1) & 1;
+                       pte_fields.valid          = pte_entry & 1;
+                       DEBUG("pte_idx=%llx, page_base_addr=0x%llx, 
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned 
long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, 
(int)pte_fields.system, (int)pte_fields.valid);
+
+                       // compute starting address
+                       start_addr = pte_fields.page_base_addr + (address & 
0xFFF);
+               } else {
+                       // in AI+ the BASE_ADDR is treated like a PDE entry...
+                       // decode PDE values
+                       pde_idx = 0; // unused
+                       pde_fields.frag_size     = (page_table_base_addr >> 59) 
& 0x1F;
+                       pde_fields.pte_base_addr = page_table_base_addr & 
0xFFFFFFFFF000ULL;
+                       pde_fields.valid         = page_table_base_addr & 1;
+                       DEBUG("pde_idx=%llx, frag_size=%u, 
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, 
(unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, 
(int)pde_fields.valid);
+
+                       // PTE addr = baseaddr[47:6] + (logical - start) >> 
fragsize)
+                       pte_idx = (address >> (12 + pde_fields.frag_size));
+
+                       umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + 
pte_idx * 8, 8, &pte_entry);
+
+                       // decode PTE values
+                       pte_fields.page_base_addr = pte_entry & 
0xFFFFFFFF000ULL;
+                       pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
+                       pte_fields.system         = (pte_entry >> 1) & 1;
+                       pte_fields.valid          = pte_entry & 1;
+                       DEBUG("pte_idx=%llx, page_base_addr=0x%llx, 
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned 
long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, 
(int)pte_fields.system, (int)pte_fields.valid);
+
+                       // compute starting address
+                       start_addr = pte_fields.page_base_addr + (address & 
0xFFF);
+               }
+
+               // read upto 4K from it
+               // TODO: Support page sizes >4KB
+               if (((start_addr & 0xFFF) + size) & ~0xFFF) {
+                       chunk_size = 0x1000 - (start_addr & 0xFFF);
+               } else {
+                       chunk_size = size;
+               }
+               DEBUG("Computed address we will read from: %s:%llx (reading: 
%lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long 
long)start_addr, (unsigned long)chunk_size);
+               if (pte_fields.system) {
+                       if (umr_read_sram(start_addr, chunk_size, pdst) < 0) {
+                               fprintf(stderr, "[ERROR] Cannot read system 
ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
+                               fprintf(stderr, "[ERROR] Alternatively download 
and install /dev/fmem\n");
+                               return -1;
+                       }
+               } else {
+                       if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, 
pdst) < 0) {
+                               fprintf(stderr, "[ERROR] Cannot read from 
VRAM\n");
+                               return -1;
+                       }
+               }
+               pdst += chunk_size;
+               size -= chunk_size;
+               address += chunk_size;
+       }
+       return 0;
+}
 
 int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, 
uint32_t size, void *dst)
 {
@@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, 
uint64_t address, uint32
                return 0;
        }
 
-       if (asic->family == FAMILY_VI)
-               return umr_read_vram_vi(asic, vmid, address, size, dst);
+       switch (asic->family) {
+               case FAMILY_VI:
+                       return umr_read_vram_vi(asic, vmid, address, size, dst);
+               case FAMILY_AI:
+                       return umr_read_vram_ai(asic, vmid, address, size, dst);
+               default:
+                       fprintf(stderr, "[BUG] Unsupported ASIC family type for 
umr_read_vram()\n");
+                       return -1;
+       }
 
        return 0;
 }
-- 
2.12.0

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to