Module: Mesa Branch: main Commit: de6653dc5d0bd5269772648c0f5edabf74a3877f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=de6653dc5d0bd5269772648c0f5edabf74a3877f
Author: Rohan Garg <rohan.g...@intel.com> Date: Mon Apr 11 13:33:07 2022 +0300 anv: WA 16014538804 for DG2, MTL A0 Send empty/dummy PIPE_CONTROL after every third 3DPRIMITIVE command. Signed-off-by: Rohan Garg <rohan.g...@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25039> --- src/intel/vulkan/anv_genX.h | 9 ++- src/intel/vulkan/anv_private.h | 5 ++ src/intel/vulkan/genX_cmd_buffer.c | 143 ++++++++++++++++++++++------------ src/intel/vulkan/genX_gpu_memcpy.c | 10 +-- src/intel/vulkan/genX_simple_shader.c | 2 +- 5 files changed, 109 insertions(+), 60 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 06de5052bea..a92b1f207ef 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -179,10 +179,11 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, enum anv_timestamp_capture_type type, void *data); -void genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, - struct anv_device *device, - uint32_t primitive_topology, - uint32_t vertex_count); +void +genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch, + const struct anv_device *device, + uint32_t primitive_topology, + uint32_t vertex_count); VkPolygonMode genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 35925ca989a..2841f3dcfa7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1965,6 +1965,11 @@ struct anv_batch { VkResult status; enum intel_engine_class engine_class; + + /** + * Number of 3DPRIMITIVE's emitted for WA 16014538804 + */ + uint8_t num_3d_primitives_emitted; }; void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9a8ac921304..6ca022fdca9 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -87,15 +87,6 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) { fprintf(stdout, ") reason: %s\n", reason); \ } -ALWAYS_INLINE static void -genX(emit_dummy_post_sync_op)(struct anv_cmd_buffer *cmd_buffer, - uint32_t vertex_count) -{ - genX(batch_emit_dummy_post_sync_op)(&cmd_buffer->batch, cmd_buffer->device, - cmd_buffer->state.gfx.primitive_topology, - vertex_count); -} - void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) { @@ -3595,6 +3586,8 @@ genX(CmdExecuteCommands)( { ANV_FROM_HANDLE(anv_cmd_buffer, container, commandBuffer); + struct anv_device *device = container->device; + if (anv_batch_has_error(&container->batch)) return; @@ -3642,7 +3635,7 @@ genX(CmdExecuteCommands)( * regardless of conditional rendering being enabled in container. */ struct mi_builder b; - mi_builder_init(&b, container->device->info, &container->batch); + mi_builder_init(&b, device->info, &container->batch); mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG), mi_imm(UINT64_MAX)); } @@ -3660,9 +3653,9 @@ genX(CmdExecuteCommands)( genX(cmd_buffer_so_memcpy)( container, - anv_state_pool_state_address(&container->device->internal_surface_state_pool, + anv_state_pool_state_address(&device->internal_surface_state_pool, dst_state), - anv_state_pool_state_address(&container->device->internal_surface_state_pool, + anv_state_pool_state_address(&device->internal_surface_state_pool, src_state), src_state.alloc_size); } @@ -3704,6 +3697,11 @@ genX(CmdExecuteCommands)( "Secondary cmd buffer not tracked in VF cache"); } +#if INTEL_NEEDS_WA_16014538804 + if (intel_needs_workaround(device->info, 16014538804)) + anv_batch_emit(&container->batch, GENX(PIPE_CONTROL), pc); +#endif + /* The secondary may have selected a different pipeline (3D or compute) and * may have changed the current L3$ configuration. Reset our tracking * variables to invalid values to ensure that we re-emit these in the case @@ -3716,7 +3714,7 @@ genX(CmdExecuteCommands)( container->state.gfx.push_constant_stages = 0; container->state.gfx.ds_write_state = false; memcpy(container->state.gfx.dyn_state.dirty, - container->device->gfx_dirty_state, + device->gfx_dirty_state, sizeof(container->state.gfx.dyn_state.dirty)); /* Each of the secondary command buffers will use its own state base @@ -3729,7 +3727,6 @@ genX(CmdExecuteCommands)( genX(cmd_buffer_emit_state_base_address)(container); /* Copy of utrace timestamp buffers from secondary into container */ - struct anv_device *device = container->device; if (u_trace_enabled(&device->ds.trace_context)) { trace_intel_begin_trace_copy(&container->trace); @@ -4246,12 +4243,12 @@ void genX(CmdDraw)( #endif } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + vertexCount); genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, vertexCount); -#endif - update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); trace_intel_end_draw(&cmd_buffer->trace, count); @@ -4304,6 +4301,12 @@ void genX(CmdDrawMultiEXT)( prim.BaseVertexLocation = 0; } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pVertexInfo[drawCount - 1].vertexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_multi(&cmd_buffer->trace, count); } @@ -4342,16 +4345,17 @@ void genX(CmdDrawMultiEXT)( prim.ExtendedParameter2 = i; } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pVertexInfo[drawCount - 1].vertexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_multi(&cmd_buffer->trace, count); } #endif -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, - drawCount == 0 ? 0 : - pVertexInfo[drawCount - 1].vertexCount); -#endif update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); } @@ -4416,12 +4420,12 @@ void genX(CmdDrawIndexed)( #endif } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + indexCount); genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, indexCount); -#endif - update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); trace_intel_end_draw_indexed(&cmd_buffer->trace, count); @@ -4489,6 +4493,13 @@ void genX(CmdDrawMultiIndexedEXT)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = *pVertexOffset; } + + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pIndexInfo[drawCount - 1].indexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count); @@ -4524,6 +4535,13 @@ void genX(CmdDrawMultiIndexedEXT)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = *pVertexOffset; } + + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pIndexInfo[drawCount - 1].indexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count); @@ -4554,6 +4572,13 @@ void genX(CmdDrawMultiIndexedEXT)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = draw->vertexOffset; } + + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pIndexInfo[drawCount - 1].indexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count); } @@ -4594,16 +4619,18 @@ void genX(CmdDrawMultiIndexedEXT)( prim.ExtendedParameter1 = firstInstance; prim.ExtendedParameter2 = i; } + + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + drawCount == 0 ? 0 : + pIndexInfo[drawCount - 1].indexCount); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); trace_intel_end_draw_indexed_multi(&cmd_buffer->trace, count); } #endif -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, - drawCount == 0 ? 0 : - pIndexInfo[drawCount - 1].indexCount); -#endif update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); } @@ -4717,10 +4744,11 @@ void genX(CmdDrawIndirectByteCountEXT)( #endif } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + 1); genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, 1); -#endif update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); @@ -4849,10 +4877,12 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer, #endif } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + 1); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, 1); -#endif update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL); @@ -5076,10 +5106,11 @@ emit_indirect_count_draws(struct anv_cmd_buffer *cmd_buffer, #endif } + genX(batch_emit_post_3dprimitive_was)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.gfx.primitive_topology, + 1); genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, 1); -#endif update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); } @@ -8189,15 +8220,14 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } void -genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, - struct anv_device *device, - uint32_t primitive_topology, - uint32_t vertex_count) -{ - if (!intel_needs_workaround(device->info, 22014412737)) - return; - - if ((primitive_topology == _3DPRIM_POINTLIST || +genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch, + const struct anv_device *device, + uint32_t primitive_topology, + uint32_t vertex_count) +{ +#if INTEL_NEEDS_WA_22014412737 || INTEL_NEEDS_WA_16014538804 + if (intel_needs_workaround(device->info, 22014412737) && + (primitive_topology == _3DPRIM_POINTLIST || primitive_topology == _3DPRIM_LINELIST || primitive_topology == _3DPRIM_LINESTRIP || primitive_topology == _3DPRIM_LINELIST_ADJ || @@ -8212,7 +8242,20 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, (batch, device->info, 0, WriteImmediateData, device->workaround_address, 0, 0); + /* Reset counter because we just emitted a PC */ + batch->num_3d_primitives_emitted = 0; + } else if (intel_needs_workaround(device->info, 16014538804)) { + batch->num_3d_primitives_emitted++; + /* WA 16014538804: + * After every 3 3D_Primitive command, + * atleast 1 pipe_control must be inserted. + */ + if (batch->num_3d_primitives_emitted == 3) { + anv_batch_emit(batch, GENX(PIPE_CONTROL), pc); + batch->num_3d_primitives_emitted = 0; + } } +#endif } struct anv_state diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 2f4e3332f17..742aea1fa83 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -236,12 +236,12 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, prim.StartInstanceLocation = 0; prim.BaseVertexLocation = 0; } - genX(emit_breakpoint)(batch, device, false); -#if GFX_VERx10 == 125 - genX(batch_emit_dummy_post_sync_op)(batch, device, _3DPRIM_POINTLIST, - size / bs); -#endif + genX(batch_emit_post_3dprimitive_was)(batch, + device, + _3DPRIM_POINTLIST, size / bs); + + genX(emit_breakpoint)(batch, device, false); } void diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 8d609ed6026..ab09259050e 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -524,8 +524,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, prim.VertexCountPerInstance = 3; prim.InstanceCount = 1; } + genX(batch_emit_post_3dprimitive_was)(batch, device, _3DPRIM_RECTLIST, 3); genX(emit_breakpoint)(batch, device, false); - genX(batch_emit_dummy_post_sync_op)(batch, device, _3DPRIM_RECTLIST, 3); } else { const struct intel_device_info *devinfo = device->info; const struct brw_cs_prog_data *prog_data =