Re: [Mesa-dev] [PATCH 3/3] radv: implement a fast prefetch path for the vertex stage

2018-04-04 Thread Dieter Nützel

For the series:

Tested-by: Dieter Nützel 

on Polaris 20 / RX580
with F1 2017, smoketest

Dieter

Am 04.04.2018 12:12, schrieb Samuel Pitoiset:

This allows to start draws as soon as possible.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 40 
++--

 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
b/src/amd/vulkan/radv_cmd_buffer.c

index 393d9ba4f4..0e59f99799 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -640,39 +640,48 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer
*cmd_buffer,

 static void
 radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+ struct radv_pipeline *pipeline,
+ bool vertex_stage_only)
 {
struct radv_cmd_state *state = _buffer->state;
+   uint32_t mask = state->prefetch_L2_mask;

if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
return;

-   if (state->prefetch_L2_mask & RADV_PREFETCH_VS)
+   if (vertex_stage_only) {
+   /* Fast prefetch path for starting draws as soon as possible.
+*/
+   mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS |
+ 
RADV_PREFETCH_VBO_DESCRIPTORS);
+   }
+
+   if (mask & RADV_PREFETCH_VS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_VERTEX]);

-   if (state->prefetch_L2_mask & RADV_PREFETCH_VBO_DESCRIPTORS)
+   if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);

-   if (state->prefetch_L2_mask & RADV_PREFETCH_TCS)
+   if (mask & RADV_PREFETCH_TCS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_TESS_CTRL]);

-   if (state->prefetch_L2_mask & RADV_PREFETCH_TES)
+   if (mask & RADV_PREFETCH_TES)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_TESS_EVAL]);

-   if (state->prefetch_L2_mask & RADV_PREFETCH_GS) {
+   if (mask & RADV_PREFETCH_GS) {
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_GEOMETRY]);
radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
}

-   if (state->prefetch_L2_mask & RADV_PREFETCH_PS)
+   if (mask & RADV_PREFETCH_PS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_FRAGMENT]);

-   state->prefetch_L2_mask = 0;
+   state->prefetch_L2_mask &= ~mask;
 }

 static void
@@ -3042,7 +3051,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 */
if (cmd_buffer->state.prefetch_L2_mask) {
radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline);
+ cmd_buffer->state.pipeline, 
false);
}
} else {
/* If we don't wait for idle, start prefetches first, then set
@@ -3051,8 +3060,11 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
si_emit_cache_flush(cmd_buffer);

if (cmd_buffer->state.prefetch_L2_mask) {
+   /* Only prefetch the vertex shader and VBO descriptors
+* in order to start the draw as soon as possible.
+*/
radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline);
+ cmd_buffer->state.pipeline, true);
}

 		if (!radv_upload_graphics_shader_descriptors(cmd_buffer, 
pipeline_is_dirty))

@@ -3060,6 +3072,14 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,

radv_emit_all_graphics_states(cmd_buffer, info);
radv_emit_draw_packets(cmd_buffer, info);
+
+   /* Prefetch the remaining shaders after the draw has been
+* started.
+*/
+   if (cmd_buffer->state.prefetch_L2_mask) {
+   radv_emit_prefetch_L2(cmd_buffer,
+ cmd_buffer->state.pipeline, 
false);
+   }
}

assert(cmd_buffer->cs->cdw <= cdw_max);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radv: implement a fast prefetch path for the vertex stage

2018-04-04 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Wed, Apr 4, 2018 at 12:12 PM, Samuel Pitoiset
 wrote:
> This allows to start draws as soon as possible.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 40 
> ++--
>  1 file changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 393d9ba4f4..0e59f99799 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -640,39 +640,48 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer 
> *cmd_buffer,
>
>  static void
>  radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
> - struct radv_pipeline *pipeline)
> + struct radv_pipeline *pipeline,
> + bool vertex_stage_only)
>  {
> struct radv_cmd_state *state = _buffer->state;
> +   uint32_t mask = state->prefetch_L2_mask;
>
> if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
> return;
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_VS)
> +   if (vertex_stage_only) {
> +   /* Fast prefetch path for starting draws as soon as possible.
> +*/
> +   mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS |
> + 
> RADV_PREFETCH_VBO_DESCRIPTORS);
> +   }
> +
> +   if (mask & RADV_PREFETCH_VS)
> radv_emit_shader_prefetch(cmd_buffer,
>   
> pipeline->shaders[MESA_SHADER_VERTEX]);
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_VBO_DESCRIPTORS)
> +   if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
> si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_TCS)
> +   if (mask & RADV_PREFETCH_TCS)
> radv_emit_shader_prefetch(cmd_buffer,
>   
> pipeline->shaders[MESA_SHADER_TESS_CTRL]);
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_TES)
> +   if (mask & RADV_PREFETCH_TES)
> radv_emit_shader_prefetch(cmd_buffer,
>   
> pipeline->shaders[MESA_SHADER_TESS_EVAL]);
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_GS) {
> +   if (mask & RADV_PREFETCH_GS) {
> radv_emit_shader_prefetch(cmd_buffer,
>   
> pipeline->shaders[MESA_SHADER_GEOMETRY]);
> radv_emit_shader_prefetch(cmd_buffer, 
> pipeline->gs_copy_shader);
> }
>
> -   if (state->prefetch_L2_mask & RADV_PREFETCH_PS)
> +   if (mask & RADV_PREFETCH_PS)
> radv_emit_shader_prefetch(cmd_buffer,
>   
> pipeline->shaders[MESA_SHADER_FRAGMENT]);
>
> -   state->prefetch_L2_mask = 0;
> +   state->prefetch_L2_mask &= ~mask;
>  }
>
>  static void
> @@ -3042,7 +3051,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
>  */
> if (cmd_buffer->state.prefetch_L2_mask) {
> radv_emit_prefetch_L2(cmd_buffer,
> - cmd_buffer->state.pipeline);
> + cmd_buffer->state.pipeline, 
> false);
> }
> } else {
> /* If we don't wait for idle, start prefetches first, then set
> @@ -3051,8 +3060,11 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
> si_emit_cache_flush(cmd_buffer);
>
> if (cmd_buffer->state.prefetch_L2_mask) {
> +   /* Only prefetch the vertex shader and VBO descriptors
> +* in order to start the draw as soon as possible.
> +*/
> radv_emit_prefetch_L2(cmd_buffer,
> - cmd_buffer->state.pipeline);
> + cmd_buffer->state.pipeline, 
> true);
> }
>
> if (!radv_upload_graphics_shader_descriptors(cmd_buffer, 
> pipeline_is_dirty))
> @@ -3060,6 +3072,14 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
>
> radv_emit_all_graphics_states(cmd_buffer, info);
> radv_emit_draw_packets(cmd_buffer, info);
> +
> +   /* Prefetch the remaining shaders after the draw has been
> +* started.
> +*/
> +   if (cmd_buffer->state.prefetch_L2_mask) {
> +   radv_emit_prefetch_L2(cmd_buffer,
> + cmd_buffer->state.pipeline, 
> false);
> +   }
> }
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> --
> 2.16.3
>

[Mesa-dev] [PATCH 3/3] radv: implement a fast prefetch path for the vertex stage

2018-04-04 Thread Samuel Pitoiset
This allows to start draws as soon as possible.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 40 ++--
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 393d9ba4f4..0e59f99799 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -640,39 +640,48 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer 
*cmd_buffer,
 
 static void
 radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+ struct radv_pipeline *pipeline,
+ bool vertex_stage_only)
 {
struct radv_cmd_state *state = _buffer->state;
+   uint32_t mask = state->prefetch_L2_mask;
 
if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
return;
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_VS)
+   if (vertex_stage_only) {
+   /* Fast prefetch path for starting draws as soon as possible.
+*/
+   mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS |
+ 
RADV_PREFETCH_VBO_DESCRIPTORS);
+   }
+
+   if (mask & RADV_PREFETCH_VS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_VERTEX]);
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_VBO_DESCRIPTORS)
+   if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_TCS)
+   if (mask & RADV_PREFETCH_TCS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_TESS_CTRL]);
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_TES)
+   if (mask & RADV_PREFETCH_TES)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_TESS_EVAL]);
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_GS) {
+   if (mask & RADV_PREFETCH_GS) {
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_GEOMETRY]);
radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
}
 
-   if (state->prefetch_L2_mask & RADV_PREFETCH_PS)
+   if (mask & RADV_PREFETCH_PS)
radv_emit_shader_prefetch(cmd_buffer,
  
pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
-   state->prefetch_L2_mask = 0;
+   state->prefetch_L2_mask &= ~mask;
 }
 
 static void
@@ -3042,7 +3051,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 */
if (cmd_buffer->state.prefetch_L2_mask) {
radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline);
+ cmd_buffer->state.pipeline, 
false);
}
} else {
/* If we don't wait for idle, start prefetches first, then set
@@ -3051,8 +3060,11 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
si_emit_cache_flush(cmd_buffer);
 
if (cmd_buffer->state.prefetch_L2_mask) {
+   /* Only prefetch the vertex shader and VBO descriptors
+* in order to start the draw as soon as possible.
+*/
radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline);
+ cmd_buffer->state.pipeline, true);
}
 
if (!radv_upload_graphics_shader_descriptors(cmd_buffer, 
pipeline_is_dirty))
@@ -3060,6 +3072,14 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 
radv_emit_all_graphics_states(cmd_buffer, info);
radv_emit_draw_packets(cmd_buffer, info);
+
+   /* Prefetch the remaining shaders after the draw has been
+* started.
+*/
+   if (cmd_buffer->state.prefetch_L2_mask) {
+   radv_emit_prefetch_L2(cmd_buffer,
+ cmd_buffer->state.pipeline, 
false);
+   }
}
 
assert(cmd_buffer->cs->cdw <= cdw_max);
-- 
2.16.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev