Thanks.  What I'm leery about is the page_table_size.  On VI it's always
4KB (with our kernel) so the logic isn't vetted much in umr.
Actually that assumption is completely incorrect.

On VI the PT and PD size are usually much larger than 4KB because they only support one extra level of page tables.

Christian.

Am 06.04.2017 um 20:19 schrieb Tom St Denis:
The more I think about it I wonder if

// AI+ allows 0=default (4KB) whereas VI requires it to be explictly set to >=4
    page_table_size = page_table_size ? page_table_size : 4;

Should be instead

    page_table_size += 4;

Since in the later code

pde_idx = (pde_address >> (page_table_depth*9 + (12 + page_table_size - 4)));

Would result in a 9-bit page size if say page_table_size was 1 which should actually be 13 bits (8KB).

Tom

On 06/04/17 02:14 PM, Tom St Denis wrote:
On 06/04/17 02:11 PM, Deucher, Alexander wrote:
-----Original Message-----
From: amd-gfx [mailto:[email protected]] On Behalf
Of Tom St Denis
Sent: Thursday, April 06, 2017 1:53 PM
To: [email protected]
Cc: StDenis, Tom
Subject: [PATCH umr] Add initial AI VM decoding

Tested with VMID0 decodings just fine.  Haven't tried VMID1-15 yet.

Signed-off-by: Tom St Denis <[email protected]>

Looks reasonable:
Acked-by: Alex Deucher <[email protected]>

Thanks.  What I'm leery about is the page_table_size.  On VI it's always
4KB (with our kernel) so the logic isn't vetted much in umr.

Then to add to it it seems AI+ interpretation is a bit different with
0==4KB (instead of 4).  I presume values greater than 0 are for
multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 =
8KB of next level entries, etc.

I'll try adding a stall to libdrm's GFX test again and see if the VMID
= 1 decoding works in the meantime.

Cheers,
Tom




---
 src/lib/read_vram.c | 180
++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 4 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index e2087a252c10..4c74f4521857 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t
size, void *dst)
       return -1;
 }

-
 static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid,
uint64_t
address, uint32_t size, void *dst)
 {
uint64_t start_addr, page_table_start_addr, page_table_base_addr,
@@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic,
uint32_t vmid, uint64_t addre
               if (page_table_depth == 1) {
                       // decode addr into pte and pde selectors...
                       pde_idx = (address >> (12 + 9 +
page_table_size)) &
((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
-                     pte_idx = (address >> 12) & ((1ULL << (9 +
page_table_size)) - 1);
+                     pte_idx = (address >> (12 + page_table_size -
4)) &
((1ULL << (9 + page_table_size)) - 1);

                       // read PDE entry
umr_read_vram(asic, 0xFFFF, page_table_base_addr
+ pde_idx * 8, 8, &pde_entry);
@@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic,
uint32_t vmid, uint64_t addre
       return 0;
 }

+static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid,
uint64_t
address, uint32_t size, void *dst)
+{
+ uint64_t start_addr, page_table_start_addr, page_table_base_addr, + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
+              pde_address;
+     uint32_t chunk_size, tmp;
+     int page_table_depth, first;
+     struct {
+             uint64_t
+                     frag_size,
+                     pte_base_addr,
+                     valid;
+     } pde_fields;
+     struct {
+             uint64_t
+                     page_base_addr,
+                     fragment,
+                     system,
+                     valid;
+     } pte_fields;
+     char buf[64];
+     unsigned char *pdst = dst;
+
+     /*
+      * PTE format on VI:
+      * 63:40 reserved
+      * 39:12 4k physical page base address
+      * 11:7 fragment
+      * 6 write
+      * 5 read
+      * 4 exe
+      * 3 reserved
+      * 2 snooped
+      * 1 system
+      * 0 valid
+      *
+      * PDE format on VI:
+      * 63:59 block fragment size
+      * 58:40 reserved
+      * 39:1 physical base address of PTE
+      * bits 5:1 must be 0.
+      * 0 valid
+      */
+
+     // read vm registers
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
+             page_table_start_addr =
(uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
+             page_table_start_addr |=
(uint64_t)umr_read_reg_by_name(asic, buf) << 44;
+
+     sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
+             tmp = umr_read_reg_by_name(asic, buf);
+             page_table_depth      = umr_bitslice_reg_by_name(asic,
buf,
"PAGE_TABLE_DEPTH", tmp);
+             page_table_size       = umr_bitslice_reg_by_name(asic,
buf,
"PAGE_TABLE_BLOCK_SIZE", tmp);
+
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
+             page_table_base_addr  =
(uint64_t)umr_read_reg_by_name(asic, buf) << 0;
+     sprintf(buf,
"mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
+             page_table_base_addr  |=
(uint64_t)umr_read_reg_by_name(asic, buf) << 32;
+
+     DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
+     DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long
long)page_table_start_addr);
+     DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long
long)page_table_base_addr);
+     DEBUG("BASE_SIZE = %lu\n", page_table_size);
+     DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
+
+     address -= page_table_start_addr;
+
+     // AI+ allows 0=default (4KB) whereas VI requires it to be
explictly set
to >=4
+     page_table_size = page_table_size ? page_table_size : 4;
+
+     first = 1;
+     while (size) {
+             if (page_table_depth >= 1) {
+ // page_table_base_addr is not a PDE entry in this
config so shift it out (it's a page address)
+                     page_table_base_addr <<= 12;
+                     pte_idx = (address >> (12 + page_table_size -
4)) &
((1ULL << (9 + page_table_size)) - 1);
+
+                     // AI+ supports more than 1 level of PDEs so we
iterate for all of the depths
+                     pde_address = address;
+                     while (page_table_depth) {
+                             // decode addr into pte and pde
selectors...
+                             pde_idx = (pde_address >>
(page_table_depth*9 + (12 + page_table_size - 4)));
+
+                             // don't mask the first PDE idx
+                             if (!first)
+                                     pde_idx &= (1ULL << 9) - 1;
+                             first = 0;
+
+                             // read PDE entry
+                             umr_read_vram(asic, 0xFFFF,
page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+
+                             // decode PDE values
+                             pde_fields.frag_size     = (pde_entry
>> 59) &
0x1F;
+                             pde_fields.pte_base_addr = pde_entry &
0xFFFFFFFFF000ULL;
+ pde_fields.valid = pde_entry & 1;
+                             DEBUG("pde_idx=%llx, frag_size=%u,
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
(unsigned)pde_fields.frag_size, (unsigned long
long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+                             // for the next round the address we're
decoding is the phys address in the currently decoded PDE
+                             --page_table_depth;
+                             pde_address = pde_fields.pte_base_addr;
+                     }
+
+                     // now read PTE entry for this page
+                     umr_read_vram(asic, 0xFFFF,
pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
+
+                     // decode PTE values
+                     pte_fields.page_base_addr = pte_entry &
0xFFFFFFFFF000ULL;
+                     pte_fields.fragment       = (pte_entry >> 7)  &
0x1F;
+ pte_fields.system = (pte_entry >> 1) & 1;
+                     pte_fields.valid          = pte_entry & 1;
+                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
(unsigned long long)pte_fields.page_base_addr,
(unsigned)pte_fields.fragment, (int)pte_fields.system,
(int)pte_fields.valid);
+
+                     // compute starting address
+ start_addr = pte_fields.page_base_addr + (address
& 0xFFF);
+             } else {
+                     // in AI+ the BASE_ADDR is treated like a PDE
entry...
+                     // decode PDE values
+                     pde_idx = 0; // unused
+                     pde_fields.frag_size     =
(page_table_base_addr >>
59) & 0x1F;
+                     pde_fields.pte_base_addr = page_table_base_addr
& 0xFFFFFFFFF000ULL;
+                     pde_fields.valid         = page_table_base_addr
& 1;
+                     DEBUG("pde_idx=%llx, frag_size=%u,
pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
(unsigned)pde_fields.frag_size, (unsigned long
long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+                     // PTE addr = baseaddr[47:6] + (logical -
start) >>
fragsize)
+                     pte_idx = (address >> (12 +
pde_fields.frag_size));
+
+                     umr_read_vram(asic, 0xFFFF,
pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
+
+                     // decode PTE values
+                     pte_fields.page_base_addr = pte_entry &
0xFFFFFFFF000ULL;
+                     pte_fields.fragment       = (pte_entry >> 7)  &
0x1F;
+ pte_fields.system = (pte_entry >> 1) & 1;
+                     pte_fields.valid          = pte_entry & 1;
+                     DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
(unsigned long long)pte_fields.page_base_addr,
(unsigned)pte_fields.fragment, (int)pte_fields.system,
(int)pte_fields.valid);
+
+                     // compute starting address
+ start_addr = pte_fields.page_base_addr + (address
& 0xFFF);
+             }
+
+             // read upto 4K from it
+             // TODO: Support page sizes >4KB
+             if (((start_addr & 0xFFF) + size) & ~0xFFF) {
+                     chunk_size = 0x1000 - (start_addr & 0xFFF);
+             } else {
+                     chunk_size = size;
+             }
+             DEBUG("Computed address we will read from: %s:%llx
(reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram",
(unsigned long
long)start_addr, (unsigned long)chunk_size);
+             if (pte_fields.system) {
+                     if (umr_read_sram(start_addr, chunk_size, pdst)
< 0)
{
+                             fprintf(stderr, "[ERROR] Cannot read
system
ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
+                             fprintf(stderr, "[ERROR] Alternatively
download and install /dev/fmem\n");
+                             return -1;
+                     }
+             } else {
+                     if (umr_read_vram(asic, 0xFFFF, start_addr,
chunk_size, pdst) < 0) {
+ fprintf(stderr, "[ERROR] Cannot read from
VRAM\n");
+                             return -1;
+                     }
+             }
+             pdst += chunk_size;
+             size -= chunk_size;
+             address += chunk_size;
+     }
+     return 0;
+}

 int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t
address,
uint32_t size, void *dst)
 {
@@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t
vmid, uint64_t address, uint32
               return 0;
       }

-     if (asic->family == FAMILY_VI)
-             return umr_read_vram_vi(asic, vmid, address, size, dst);
+     switch (asic->family) {
+             case FAMILY_VI:
+ return umr_read_vram_vi(asic, vmid, address, size,
dst);
+             case FAMILY_AI:
+ return umr_read_vram_ai(asic, vmid, address, size,
dst);
+             default:
+                     fprintf(stderr, "[BUG] Unsupported ASIC family
type
for umr_read_vram()\n");
+                     return -1;
+     }

       return 0;
 }
--
2.12.0

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to