Module: Mesa Branch: staging/23.0 Commit: 6dfc7eaeb2be6c6302ee2719e1dc17e6178749f3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6dfc7eaeb2be6c6302ee2719e1dc17e6178749f3
Author: Marcin Ĺšlusarz <[email protected]> Date: Mon Jan 30 17:09:26 2023 +0100 intel/compiler/mesh: use slice id of task urb handles in mesh shaders When mesh shader is spawned on a different slice than the originating task shader, then input task urb handle can come from a different slice, so masking this information off will load data from the current slice, instead of the one where real data are. Reviewed-by: Caio Oliveira <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21007> (cherry picked from commit dd9bf86725bac3123902d6a85643e6ca567eff56) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs.h | 3 ++- src/intel/compiler/brw_mesh.cpp | 11 ++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 6d4b0486e9f..2f32b276534 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3541,7 +3541,7 @@ "description": "intel/compiler/mesh: use slice id of task urb handles in mesh shaders", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d42d9ef901d..7f39ba51241 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -418,7 +418,8 @@ public: const fs_reg &urb_handle); void emit_task_mesh_load(const brw::fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &urb_handle); + const fs_reg &urb_handle, + bool mask); void emit_barrier(); void emit_tcs_barrier(); diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 3ea6bfab1c9..0f5a0e185c7 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -1197,7 +1197,7 @@ fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *ins void fs_visitor::emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *instr, - const fs_reg &urb_handle) + const fs_reg &urb_handle, bool mask) { fs_reg dest = get_nir_dest(instr->dest); nir_src *offset_nir_src = nir_get_io_offset_src(instr); @@ -1205,7 +1205,8 @@ fs_visitor::emit_task_mesh_load(const fs_builder &bld, nir_intrinsic_instr *inst fs_builder ubld8 = bld.group(8, 0).exec_all(); fs_reg h = ubld8.vgrf(BRW_REGISTER_TYPE_UD, 1); ubld8.MOV(h, urb_handle); - ubld8.AND(h, h, brw_imm_ud(0xFFFF)); + if (mask) + ubld8.AND(h, h, brw_imm_ud(0xFFFF)); /* TODO(mesh): for per_vertex and per_primitive, if we could keep around * the non-array-index offset, we could use to decide if we can perform @@ -1233,7 +1234,7 @@ fs_visitor::nir_emit_task_intrinsic(const fs_builder &bld, case nir_intrinsic_load_output: case nir_intrinsic_load_task_payload: - emit_task_mesh_load(bld, instr, payload.urb_output); + emit_task_mesh_load(bld, instr, payload.urb_output, true); break; default: @@ -1259,11 +1260,11 @@ fs_visitor::nir_emit_mesh_intrinsic(const fs_builder &bld, case nir_intrinsic_load_per_vertex_output: case nir_intrinsic_load_per_primitive_output: case nir_intrinsic_load_output: - emit_task_mesh_load(bld, instr, payload.urb_output); + emit_task_mesh_load(bld, instr, payload.urb_output, true); break; case nir_intrinsic_load_task_payload: - emit_task_mesh_load(bld, instr, payload.task_urb_input); + emit_task_mesh_load(bld, instr, payload.task_urb_input, false); break; default:
