Re: [Mesa-dev] [PATCH v2 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-20 Thread Rafael Antognolli
On Tue, Jun 19, 2018 at 02:45:28PM +0100, Lionel Landwerlin wrote:
> From: Scott D Phillips 
> 
> For gen8+, write out PPGTT tables in aub files so that full 48-bit
> addresses can be serialized.
> 
> v2: Fix handling of `end` index in map_ppgtt
> 
> Signed-off-by: Scott D Phillips 
> Signed-off-by: Lionel Landwerlin 
> Cc: Jordan Justen 
> ---
>  src/intel/tools/intel_aub.h  |   3 +-
>  src/intel/tools/intel_dump_gpu.c | 315 +++
>  2 files changed, 151 insertions(+), 167 deletions(-)
> 
> diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
> index 9ca548edaf3..2888515048f 100644
> --- a/src/intel/tools/intel_aub.h
> +++ b/src/intel/tools/intel_aub.h
> @@ -117,7 +117,8 @@
>  /* DW3 */
>  
>  #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK  0xf000
> -#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL (1 << 28)
> +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT  (0 << 28)
> +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL  (2 << 28)
>  #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY(4 << 28)
>  
>  /**
> diff --git a/src/intel/tools/intel_dump_gpu.c 
> b/src/intel/tools/intel_dump_gpu.c
> index 86c133da433..bfff481ba5e 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -51,6 +51,8 @@
>  #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
>  #define MI_LRI_FORCE_POSTED   (1<<12)
>  
> +#define MI_BATCH_NON_SECURE_I965 (1 << 8)
> +
>  #define MI_BATCH_BUFFER_END (0xA << 23)
>  
>  #define min(a, b) ({\
> @@ -59,6 +61,12 @@
>   _a < _b ? _a : _b; \
>})
>  
> +#define max(a, b) ({\
> + __typeof(a) _a = (a);  \
> + __typeof(b) _b = (b);  \
> + _a > _b ? _a : _b; \
> +  })
> +
>  #define HWS_PGA_RCSUNIT  0x02080
>  #define HWS_PGA_VCSUNIT0   0x12080
>  #define HWS_PGA_BCSUNIT  0x22080
> @@ -93,8 +101,12 @@
>  
>  #define RING_SIZE (1 * 4096)
>  #define PPHWSP_SIZE (1 * 4096)
> -#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
> -#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
> +#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
> +#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
> +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
> +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
> +#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
> +
>  
>  #define STATIC_GGTT_MAP_START 0
>  
> @@ -110,14 +122,19 @@
>  #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
> GEN8_LR_CONTEXT_OTHER_SIZE)
>  #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
>  
> -#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
> -   Legacy Context with no 64 bit VA support 
> | Valid */
> +#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
> +
> +#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
> + * PPGTT Enabled |
> + * Legacy Context with 64 bit VA support |
> + * Valid
> + */
>  
> -#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  
> | CONTEXT_FLAGS)
> -#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR 
> | CONTEXT_FLAGS)
> -#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   
> | CONTEXT_FLAGS)
> +#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  
> | CONTEXT_FLAGS)
> +#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR 
> | CONTEXT_FLAGS)
> +#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   
> | CONTEXT_FLAGS)
>  
> -static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
> +static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* 
> Choose the largest */
>sizeof(uint32_t)] = {
> 0 /* MI_NOOP */,
> MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED,
> @@ -147,8 +164,8 @@ static const uint32_t 
> render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
> 0x2280 /* PDP2_LDW */,  0,
> 0x227C /* PDP1_UDW */,  0,
> 0x2278 /* PDP1_LDW */,  0,
> -   0x2274 /* PDP0_UDW */,  0,
> -   0x2270 /* PDP0_LDW */,  0,
> +   0x2274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
> +   0x2270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
> /* MI_NOOP */
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
>  
> @@ -185,8 +202,8 @@ static const uint32_t 
> blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
> 0x22280 /* PDP2_LDW */,  0,
> 0x2227C /* PDP1_UDW */,  0,
> 0x22278 /* PDP1_LDW */,  0,
> -   0x22274 /* PDP0_UDW */,  0,
> -   0x22270 /* PDP0_LDW */,  0,
> +   0x22274 /* 

[Mesa-dev] [PATCH v2 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-19 Thread Lionel Landwerlin
From: Scott D Phillips 

For gen8+, write out PPGTT tables in aub files so that full 48-bit
addresses can be serialized.

v2: Fix handling of `end` index in map_ppgtt

Signed-off-by: Scott D Phillips 
Signed-off-by: Lionel Landwerlin 
Cc: Jordan Justen 
---
 src/intel/tools/intel_aub.h  |   3 +-
 src/intel/tools/intel_dump_gpu.c | 315 +++
 2 files changed, 151 insertions(+), 167 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 9ca548edaf3..2888515048f 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -117,7 +117,8 @@
 /* DW3 */
 
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
-#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT(0 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL(2 << 28)
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
 
 /**
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 86c133da433..bfff481ba5e 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -51,6 +51,8 @@
 #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
 #define MI_LRI_FORCE_POSTED   (1<<12)
 
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
 #define MI_BATCH_BUFFER_END (0xA << 23)
 
 #define min(a, b) ({\
@@ -59,6 +61,12 @@
  _a < _b ? _a : _b; \
   })
 
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
 #define HWS_PGA_RCSUNIT  0x02080
 #define HWS_PGA_VCSUNIT0   0x12080
 #define HWS_PGA_BCSUNIT  0x22080
@@ -93,8 +101,12 @@
 
 #define RING_SIZE (1 * 4096)
 #define PPHWSP_SIZE (1 * 4096)
-#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
-#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
 
 #define STATIC_GGTT_MAP_START 0
 
@@ -110,14 +122,19 @@
 #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
 #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
 
-#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
-   Legacy Context with no 64 bit VA support | 
Valid */
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+ * Legacy Context with 64 bit VA support |
+ * Valid
+ */
 
-#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
-#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
-#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
 
-static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
+static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* 
Choose the largest */
   sizeof(uint32_t)] = {
0 /* MI_NOOP */,
MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED,
@@ -147,8 +164,8 @@ static const uint32_t 
render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
0x2280 /* PDP2_LDW */,  0,
0x227C /* PDP1_UDW */,  0,
0x2278 /* PDP1_LDW */,  0,
-   0x2274 /* PDP0_UDW */,  0,
-   0x2270 /* PDP0_LDW */,  0,
+   0x2274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x2270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -185,8 +202,8 @@ static const uint32_t 
blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x22280 /* PDP2_LDW */,  0,
0x2227C /* PDP1_UDW */,  0,
0x22278 /* PDP1_LDW */,  0,
-   0x22274 /* PDP0_UDW */,  0,
-   0x22270 /* PDP0_LDW */,  0,
+   0x22274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x22270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -220,8 +237,8 @@ static const uint32_t 
video_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x1C280 /* PDP2_LDW */,  0,
0x1C27C /* PDP1_UDW */,  0,