Re: [Mesa-dev] [PATCH v3 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-22 Thread Rafael Antognolli
On Thu, Jun 21, 2018 at 05:29:12PM +0100, Lionel Landwerlin wrote:
> From: Scott D Phillips 
> 
> For gen8+, write out PPGTT tables in aub files so that full 48-bit
> addresses can be serialized.

I don't fully understand how things worked before this patch, in the
GEN < 10 case. It looks to me that we would setup a GGTT mapping only
64MiB of memory, but that wouldn't make much sense. So I also don't know
how things work on the legacy behavior, even after this patch.

For the execlists case, things make more sense to me, so I'll add some
comments that imho we could add to help explain this patch. Assuming
those comments make sense and are correct, this patch is

Reviewed-by: Rafael Antognolli 

> 
> v2: Fix handling of `end` index in map_ppgtt
> 
> v3: Correctly mark GGTT entry as present (Rafael)
> 
> Signed-off-by: Scott D Phillips 
> Signed-off-by: Lionel Landwerlin 
> Cc: Jordan Justen 
> ---
>  src/intel/tools/intel_aub.h  |   3 +-
>  src/intel/tools/intel_dump_gpu.c | 315 +++
>  2 files changed, 151 insertions(+), 167 deletions(-)
> 
> diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
> index 9ca548edaf3..2888515048f 100644
> --- a/src/intel/tools/intel_aub.h
> +++ b/src/intel/tools/intel_aub.h
> @@ -117,7 +117,8 @@
>  /* DW3 */
>  
>  #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK  0xf000
> -#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL (1 << 28)

Add some comments here:

/**
 * Address spaces:
 *   - GGTT: virtual addresses written through GGTT that need to be
 * translated to physical addresses
 *   - PHYSICAL: physical addresses, no GTT translation needed.
 *   - GGTT_ENTRY: adds an entry to the GGTT.
 *
 * Note that there's no PPGGTT address space, because PPGTT virtual
 * addresses get translated and written as physical addresses.
 */
> +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT  (0 << 28)
> +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL  (2 << 28)
>  #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY(4 << 28)
>  
>  /**
> diff --git a/src/intel/tools/intel_dump_gpu.c 
> b/src/intel/tools/intel_dump_gpu.c
> index 86c133da433..a9ce109b2b6 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -51,6 +51,8 @@
>  #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
>  #define MI_LRI_FORCE_POSTED   (1<<12)
>  
> +#define MI_BATCH_NON_SECURE_I965 (1 << 8)
> +
>  #define MI_BATCH_BUFFER_END (0xA << 23)
>  
>  #define min(a, b) ({\
> @@ -59,6 +61,12 @@
>   _a < _b ? _a : _b; \
>})
>  
> +#define max(a, b) ({\
> + __typeof(a) _a = (a);  \
> + __typeof(b) _b = (b);  \
> + _a > _b ? _a : _b; \
> +  })
> +
>  #define HWS_PGA_RCSUNIT  0x02080
>  #define HWS_PGA_VCSUNIT0   0x12080
>  #define HWS_PGA_BCSUNIT  0x22080
> @@ -93,8 +101,12 @@
>  
>  #define RING_SIZE (1 * 4096)
>  #define PPHWSP_SIZE (1 * 4096)
> -#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
> -#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
> +#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
> +#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
> +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
> +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
> +#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
> +
>  
>  #define STATIC_GGTT_MAP_START 0
>  
> @@ -110,14 +122,19 @@
>  #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
> GEN8_LR_CONTEXT_OTHER_SIZE)
>  #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
>  
> -#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
> -   Legacy Context with no 64 bit VA support 
> | Valid */
> +#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
> +
> +#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
> + * PPGTT Enabled |
> + * Legacy Context with 64 bit VA support |
> + * Valid
> + */
>  
> -#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  
> | CONTEXT_FLAGS)
> -#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR 
> | CONTEXT_FLAGS)
> -#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   
> | CONTEXT_FLAGS)
> +#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  
> | CONTEXT_FLAGS)
> +#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR 
> | CONTEXT_FLAGS)
> +#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   
> | CONTEXT_FLAGS)
>  
> -static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
> +static const uint32_t 

[Mesa-dev] [PATCH v3 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

For gen8+, write out PPGTT tables in aub files so that full 48-bit
addresses can be serialized.

v2: Fix handling of `end` index in map_ppgtt

v3: Correctly mark GGTT entry as present (Rafael)

Signed-off-by: Scott D Phillips 
Signed-off-by: Lionel Landwerlin 
Cc: Jordan Justen 
---
 src/intel/tools/intel_aub.h  |   3 +-
 src/intel/tools/intel_dump_gpu.c | 315 +++
 2 files changed, 151 insertions(+), 167 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 9ca548edaf3..2888515048f 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -117,7 +117,8 @@
 /* DW3 */
 
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
-#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT(0 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL(2 << 28)
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
 
 /**
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 86c133da433..a9ce109b2b6 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -51,6 +51,8 @@
 #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
 #define MI_LRI_FORCE_POSTED   (1<<12)
 
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
 #define MI_BATCH_BUFFER_END (0xA << 23)
 
 #define min(a, b) ({\
@@ -59,6 +61,12 @@
  _a < _b ? _a : _b; \
   })
 
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
 #define HWS_PGA_RCSUNIT  0x02080
 #define HWS_PGA_VCSUNIT0   0x12080
 #define HWS_PGA_BCSUNIT  0x22080
@@ -93,8 +101,12 @@
 
 #define RING_SIZE (1 * 4096)
 #define PPHWSP_SIZE (1 * 4096)
-#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
-#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
 
 #define STATIC_GGTT_MAP_START 0
 
@@ -110,14 +122,19 @@
 #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
 #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
 
-#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
-   Legacy Context with no 64 bit VA support | 
Valid */
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+ * Legacy Context with 64 bit VA support |
+ * Valid
+ */
 
-#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
-#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
-#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
 
-static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
+static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* 
Choose the largest */
   sizeof(uint32_t)] = {
0 /* MI_NOOP */,
MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED,
@@ -147,8 +164,8 @@ static const uint32_t 
render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
0x2280 /* PDP2_LDW */,  0,
0x227C /* PDP1_UDW */,  0,
0x2278 /* PDP1_LDW */,  0,
-   0x2274 /* PDP0_UDW */,  0,
-   0x2270 /* PDP0_LDW */,  0,
+   0x2274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x2270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -185,8 +202,8 @@ static const uint32_t 
blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x22280 /* PDP2_LDW */,  0,
0x2227C /* PDP1_UDW */,  0,
0x22278 /* PDP1_LDW */,  0,
-   0x22274 /* PDP0_UDW */,  0,
-   0x22270 /* PDP0_LDW */,  0,
+   0x22274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x22270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -220,8 +237,8 @@ static const uint32_t 
video_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x1C280 /* PDP2_LDW */,