Module: Mesa Branch: staging/23.2 Commit: 81e2dbc5e02f528c84998c5448c1eff1e7e04838 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=81e2dbc5e02f528c84998c5448c1eff1e7e04838
Author: Iván Briano <[email protected]> Date: Thu Jul 6 13:02:29 2023 -0700 anv: implement Wa_14019750404 Cc: 23.2 <mesa-stable> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8931 Reviewed-by: Marcin Ślusarz <[email protected]> Reviewed-by: José Roberto de Souza <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24150> (cherry picked from commit 4ad19c8310546a276ec7c68dfceb7c545ab13047) --- .pick_status.json | 2 +- src/intel/vulkan/anv_device.c | 4 +- src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/genX_cmd_buffer.c | 34 +++++++++++++++++ src/intel/vulkan/genX_pipeline.c | 77 +++++++++++++++++++++++--------------- 5 files changed, 84 insertions(+), 34 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index f5d142018c9..13c2b6428eb 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1204,7 +1204,7 @@ "description": "anv: implement Wa_14019750404", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 5f3297fef8b..2d76d5b21c0 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -196,8 +196,8 @@ get_device_extensions(const struct anv_physical_device *device, const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing; - /* We are seeing hangs on other workloads when something using mesh - * shaders runs at the same time, so it's disabled by default. + /* We are still seeing some failures with mesh and graphics pipeline + * libraries used together, so disable mesh by default. */ const bool mesh_shader_enabled = device->info.has_mesh_shading && debug_get_bool_option("ANV_MESH_SHADER", false); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7ecea5534e3..bbf81816106 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3539,6 +3539,7 @@ struct anv_graphics_pipeline { uint32_t wm[2]; uint32_t streamout_state[5]; uint32_t hs[9]; + uint32_t ds[11]; } gfx8; }; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 99dc152409e..53c55e7ff94 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3333,6 +3333,35 @@ genX(emit_hs)(struct anv_cmd_buffer *cmd_buffer) memcpy(dw, &pipeline->gfx8.hs, sizeof(pipeline->gfx8.hs)); } +ALWAYS_INLINE static void +genX(emit_ds)(struct anv_cmd_buffer *cmd_buffer) +{ +#if GFX_VERx10 >= 125 + /* Wa_14019750404: + * In any 3D enabled context, just before any Tessellation enabled draw + * call (3D Primitive), re-send the last programmed 3DSTATE_DS again. + * This will make sure that the 3DSTATE_INT generated just before the + * draw call will have TDS dirty which will make sure TDS will launch the + * state thread before the draw call. + * + * This fixes a hang resulting from running anything using tessellation + * after a switch away from the mesh pipeline. + * We don't need to track said switch, as it matters at the HW level, and + * can be triggered even across processes, so we apply the Wa at all times. + * + * FIXME: Use INTEL_NEEDS_WA_14019750404 once the tool picks it up. + */ + struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) + return; + + uint32_t *dw = + anv_batch_emitn(&cmd_buffer->batch, GENX(3DSTATE_DS_length), + GENX(3DSTATE_DS)); + memcpy(dw, &pipeline->gfx8.ds, sizeof(pipeline->gfx8.ds)); +#endif +} + ALWAYS_INLINE static void genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -4274,6 +4303,7 @@ void genX(CmdDraw)( #endif genX(cmd_buffer_flush_gfx_state)(cmd_buffer); + genX(emit_ds)(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, #if GFX_VER < 11 @@ -4361,6 +4391,7 @@ void genX(CmdDrawMultiEXT)( */ if (i && (INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343)) genX(emit_hs)(cmd_buffer); + genX(emit_ds)(cmd_buffer); const uint32_t count = draw->vertexCount * instanceCount; anv_measure_snapshot(cmd_buffer, @@ -4594,6 +4625,7 @@ void genX(CmdDrawMultiIndexedEXT)( */ if (i && (INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343)) genX(emit_hs)(cmd_buffer); + genX(emit_ds)(cmd_buffer); const uint32_t count = draw->indexCount * instanceCount * pipeline->instance_multiplier; @@ -4853,6 +4885,7 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer, */ if (i && (INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343)) genX(emit_hs)(cmd_buffer); + genX(emit_ds)(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, #if GFX_VER < 11 @@ -5078,6 +5111,7 @@ emit_indirect_count_draws(struct anv_cmd_buffer *cmd_buffer, */ if (i && (INTEL_NEEDS_WA_1306463417 || INTEL_NEEDS_WA_16011107343)) genX(emit_hs)(cmd_buffer); + genX(emit_ds)(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, #if GFX_VER < 11 diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index d6f26a709f8..0066c02ce9d 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1317,52 +1317,67 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, hs.DispatchMode = tcs_prog_data->base.dispatch_mode; hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; + STATIC_ASSERT(ARRAY_SIZE(pipeline->gfx8.hs) == GENX(3DSTATE_HS_length)); GENX(3DSTATE_HS_pack)(&pipeline->base.base.batch, pipeline->gfx8.hs, &hs); - anv_batch_emit(batch, GENX(3DSTATE_DS), ds) { - ds.Enable = true; - ds.StatisticsEnable = true; - ds.KernelStartPointer = tes_bin->kernel.offset; - /* Wa_1606682166 */ - ds.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tes_bin); - ds.BindingTableEntryCount = tes_bin->bind_map.surface_count; - ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; + struct GENX(3DSTATE_DS) ds = { + GENX(3DSTATE_DS_header), + }; + + ds.Enable = true; + ds.StatisticsEnable = true; + ds.KernelStartPointer = tes_bin->kernel.offset; + /* Wa_1606682166 */ + ds.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tes_bin); + ds.BindingTableEntryCount = tes_bin->bind_map.surface_count; + ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; - ds.ComputeWCoordinateEnable = - tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; + ds.ComputeWCoordinateEnable = + tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; - ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length; - ds.PatchURBEntryReadOffset = 0; - ds.DispatchGRFStartRegisterForURBData = - tes_prog_data->base.base.dispatch_grf_start_reg; + ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length; + ds.PatchURBEntryReadOffset = 0; + ds.DispatchGRFStartRegisterForURBData = + tes_prog_data->base.base.dispatch_grf_start_reg; #if GFX_VER < 11 - ds.DispatchMode = - tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? - DISPATCH_MODE_SIMD8_SINGLE_PATCH : - DISPATCH_MODE_SIMD4X2; + ds.DispatchMode = + tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? + DISPATCH_MODE_SIMD8_SINGLE_PATCH : + DISPATCH_MODE_SIMD4X2; #else - assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); - ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; + assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; #endif - ds.UserClipDistanceClipTestEnableBitmask = - tes_prog_data->base.clip_distance_mask; - ds.UserClipDistanceCullTestEnableBitmask = - tes_prog_data->base.cull_distance_mask; + ds.UserClipDistanceClipTestEnableBitmask = + tes_prog_data->base.clip_distance_mask; + ds.UserClipDistanceCullTestEnableBitmask = + tes_prog_data->base.cull_distance_mask; #if GFX_VER >= 12 - ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id; + ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id; #endif #if GFX_VERx10 >= 125 - ds.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); + ds.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); #else - ds.PerThreadScratchSpace = get_scratch_space(tes_bin); - ds.ScratchSpaceBasePointer = - get_scratch_address(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); + ds.PerThreadScratchSpace = get_scratch_space(tes_bin); + ds.ScratchSpaceBasePointer = + get_scratch_address(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); #endif - } + + /* Wa_14019750404: + * See genX(emit_ds)(). + * We need to both emit 3DSTATE_DS now, and before each 3DPRIMITIVE, so + * we pack it to have it later, and memcpy into the current batch. + */ + STATIC_ASSERT(ARRAY_SIZE(pipeline->gfx8.ds) == GENX(3DSTATE_DS_length)); + GENX(3DSTATE_DS_pack)(&pipeline->base.base.batch, pipeline->gfx8.ds, &ds); + + uint32_t *dw = + anv_batch_emitn(batch, GENX(3DSTATE_DS_length), GENX(3DSTATE_DS)); + memcpy(dw, &pipeline->gfx8.ds, sizeof(pipeline->gfx8.ds)); } static void
