The more I think about it I wonder if

// AI+ allows 0=default (4KB) whereas VI requires it to be explictly set to >=4
        page_table_size = page_table_size ? page_table_size : 4;

Should be instead

        page_table_size += 4;

Since in the later code

pde_idx = (pde_address >> (page_table_depth*9 + (12 + page_table_size - 4)));

Would result in a 9-bit page size if say page_table_size was 1 which should actually be 13 bits (8KB).

Tom

On 06/04/17 02:14 PM, Tom St Denis wrote:
On 06/04/17 02:11 PM, Deucher, Alexander wrote:
-----Original Message-----
From: amd-gfx [mailto:[email protected]] On Behalf
Of Tom St Denis
Sent: Thursday, April 06, 2017 1:53 PM
To: [email protected]
Cc: StDenis, Tom
Subject: [PATCH umr] Add initial AI VM decoding

Tested with VMID0 decodings just fine.  Haven't tried VMID1-15 yet.

Signed-off-by: Tom St Denis <[email protected]>

Looks reasonable:
Acked-by: Alex Deucher <[email protected]>

Thanks.  What I'm leery about is the page_table_size.  On VI it's always
4KB (with our kernel) so the logic isn't vetted much in umr.

Then to add to it it seems AI+ interpretation is a bit different with
0==4KB (instead of 4).  I presume values greater than 0 are for
multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 =
8KB of next level entries, etc.

I'll try adding a stall to libdrm's GFX test again and see if the VMID
= 1 decoding works in the meantime.

Cheers,
Tom




---
 src/lib/read_vram.c | 180
++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 4 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index e2087a252c10..4c74f4521857 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t
size, void *dst)
       return -1;
 }

-
 static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid,
uint64_t
address, uint32_t size, void *dst)
 {
       uint64_t start_addr, page_table_start_addr, page_table_base_addr,
@@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic,
uint32_t vmid, uint64_t addre
               if (page_table_depth == 1) {
                       // decode addr into pte and pde selectors...
                       pde_idx = (address >> (12 + 9 +
page_table_size)) &
((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
-                     pte_idx = (address >> 12) & ((1ULL << (9 +
page_table_size)) - 1);
+                     pte_idx = (address >> (12 + page_table_size -
4)) &
((1ULL << (9 + page_table_size)) - 1);

                       // read PDE entry
                       umr_read_vram(asic, 0xFFFF, page_table_base_addr
+ pde_idx * 8, 8, &pde_entry);
@@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic,
uint32_t vmid, uint64_t addre
       return 0;
 }

+static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid,
uint64_t
address, uint32_t size, void *dst)
+{
+     uint64_t start_addr, page_table_start_addr, page_table_base_addr,
+              page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
+              pde_address;
+     uint32_t chunk_size, tmp;
+     int page_table_depth, first;
+     struct {
+             uint64_t
+                     frag_size,
+                     pte_base_addr,
+                     valid;
+     } pde_fields;
+     struct {
+             uint64_t
+                     page_base_addr,
+                     fragment,
+                     system,
+                     valid;
+     } pte_fields;
+     char buf[64];
+     unsigned char *pdst = dst;
+
+     /*
+      * PTE format on VI:
+      * 63:40 reserved
+      * 39:12 4k physical page base address
+      * 11:7 fragment
+      * 6 write
+      * 5 read
+      * 4 exe
+      * 3 reserved
+      * 2 snooped
+      * 1 system
+      * 0 valid
+      *
+      * PDE format on VI:
+      * 63:59 block fragment size
+      * 58:40 reserved
+      * 39:1 physical base address of PTE
+      * bits 5:1 must be 0.
+      * 0 valid
+      */
+
+     // read vm registers
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
+             page_table_start_addr =
(uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
+             page_table_start_addr |=
(uint64_t)umr_read_reg_by_name(asic, buf) << 44;
+
+     sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
+             tmp = umr_read_reg_by_name(asic, buf);
+             page_table_depth      = umr_bitslice_reg_by_name(asic,
buf,
"PAGE_TABLE_DEPTH", tmp);
+             page_table_size       = umr_bitslice_reg_by_name(asic,
buf,
"PAGE_TABLE_BLOCK_SIZE", tmp);
+
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
+             page_table_base_addr  =
(uint64_t)umr_read_reg_by_name(asic, buf) << 0;
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
+             page_table_base_addr  |=
(uint64_t)umr_read_reg_by_name(asic, buf) << 32;
+
+     DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
+     DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long
long)page_table_start_addr);
+     DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long
long)page_table_base_addr);
+     DEBUG("BASE_SIZE = %lu\n", page_table_size);
+     DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
+
+     address -= page_table_start_addr;
+
+     // AI+ allows 0=default (4KB) whereas VI requires it to be
explictly set
to >=4
+     page_table_size = page_table_size ? page_table_size : 4;
+
+     first = 1;
+     while (size) {
+             if (page_table_depth >= 1) {
+                     // page_table_base_addr is not a PDE entry in this
config so shift it out (it's a page address)
+                     page_table_base_addr <<= 12;
+                     pte_idx = (address >> (12 + page_table_size -
4)) &
((1ULL << (9 + page_table_size)) - 1);
+
+                     // AI+ supports more than 1 level of PDEs so we
iterate for all of the depths
+                     pde_address = address;
+                     while (page_table_depth) {
+                             // decode addr into pte and pde
selectors...
+                             pde_idx = (pde_address >>
(page_table_depth*9 + (12 + page_table_size - 4)));
+
+                             // don't mask the first PDE idx
+                             if (!first)
+                                     pde_idx &= (1ULL << 9) - 1;
+                             first = 0;
+
+                             // read PDE entry
+                             umr_read_vram(asic, 0xFFFF,
page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+
+                             // decode PDE values
+                             pde_fields.frag_size     = (pde_entry
>> 59) &
0x1F;
+                             pde_fields.pte_base_addr = pde_entry &
0xFFFFFFFFF000ULL;
+                             pde_fields.valid         = pde_entry & 1;
+                             DEBUG("pde_idx=%llx, frag_size=%u,
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
(unsigned)pde_fields.frag_size, (unsigned long
long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+                             // for the next round the address we're
decoding is the phys address in the currently decoded PDE
+                             --page_table_depth;
+                             pde_address = pde_fields.pte_base_addr;
+                     }
+
+                     // now read PTE entry for this page
+                     umr_read_vram(asic, 0xFFFF,
pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
+
+                     // decode PTE values
+                     pte_fields.page_base_addr = pte_entry &
0xFFFFFFFFF000ULL;
+                     pte_fields.fragment       = (pte_entry >> 7)  &
0x1F;
+                     pte_fields.system         = (pte_entry >> 1) & 1;
+                     pte_fields.valid          = pte_entry & 1;
+                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
(unsigned long long)pte_fields.page_base_addr,
(unsigned)pte_fields.fragment, (int)pte_fields.system,
(int)pte_fields.valid);
+
+                     // compute starting address
+                     start_addr = pte_fields.page_base_addr + (address
& 0xFFF);
+             } else {
+                     // in AI+ the BASE_ADDR is treated like a PDE
entry...
+                     // decode PDE values
+                     pde_idx = 0; // unused
+                     pde_fields.frag_size     =
(page_table_base_addr >>
59) & 0x1F;
+                     pde_fields.pte_base_addr = page_table_base_addr
& 0xFFFFFFFFF000ULL;
+                     pde_fields.valid         = page_table_base_addr
& 1;
+                     DEBUG("pde_idx=%llx, frag_size=%u,
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
(unsigned)pde_fields.frag_size, (unsigned long
long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+                     // PTE addr = baseaddr[47:6] + (logical -
start) >>
fragsize)
+                     pte_idx = (address >> (12 +
pde_fields.frag_size));
+
+                     umr_read_vram(asic, 0xFFFF,
pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
+
+                     // decode PTE values
+                     pte_fields.page_base_addr = pte_entry &
0xFFFFFFFF000ULL;
+                     pte_fields.fragment       = (pte_entry >> 7)  &
0x1F;
+                     pte_fields.system         = (pte_entry >> 1) & 1;
+                     pte_fields.valid          = pte_entry & 1;
+                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
(unsigned long long)pte_fields.page_base_addr,
(unsigned)pte_fields.fragment, (int)pte_fields.system,
(int)pte_fields.valid);
+
+                     // compute starting address
+                     start_addr = pte_fields.page_base_addr + (address
& 0xFFF);
+             }
+
+             // read upto 4K from it
+             // TODO: Support page sizes >4KB
+             if (((start_addr & 0xFFF) + size) & ~0xFFF) {
+                     chunk_size = 0x1000 - (start_addr & 0xFFF);
+             } else {
+                     chunk_size = size;
+             }
+             DEBUG("Computed address we will read from: %s:%llx
(reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram",
(unsigned long
long)start_addr, (unsigned long)chunk_size);
+             if (pte_fields.system) {
+                     if (umr_read_sram(start_addr, chunk_size, pdst)
< 0)
{
+                             fprintf(stderr, "[ERROR] Cannot read
system
ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
+                             fprintf(stderr, "[ERROR] Alternatively
download and install /dev/fmem\n");
+                             return -1;
+                     }
+             } else {
+                     if (umr_read_vram(asic, 0xFFFF, start_addr,
chunk_size, pdst) < 0) {
+                             fprintf(stderr, "[ERROR] Cannot read from
VRAM\n");
+                             return -1;
+                     }
+             }
+             pdst += chunk_size;
+             size -= chunk_size;
+             address += chunk_size;
+     }
+     return 0;
+}

 int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t
address,
uint32_t size, void *dst)
 {
@@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t
vmid, uint64_t address, uint32
               return 0;
       }

-     if (asic->family == FAMILY_VI)
-             return umr_read_vram_vi(asic, vmid, address, size, dst);
+     switch (asic->family) {
+             case FAMILY_VI:
+                     return umr_read_vram_vi(asic, vmid, address, size,
dst);
+             case FAMILY_AI:
+                     return umr_read_vram_ai(asic, vmid, address, size,
dst);
+             default:
+                     fprintf(stderr, "[BUG] Unsupported ASIC family
type
for umr_read_vram()\n");
+                     return -1;
+     }

       return 0;
 }
--
2.12.0

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to