Module: Mesa Branch: main Commit: 40416850f186615d4d2cce95323137262a6789ba URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=40416850f186615d4d2cce95323137262a6789ba
Author: Caio Oliveira <caio.olive...@intel.com> Date: Tue Oct 31 20:45:31 2023 -0700 intel/compiler: Re-enable opt_zero_samples() in many cases for Gfx12.5 The workaround applies specifically to Cube and Cube Arrays, so we can still apply the optimization for the others. Ideally we would like to pull opt_zero_samples logic into the lowering sends -- to avoid adding a bit to communicate between passes. However the texture coordinates for the LOGICAL backend instructions, which are a common target for the optimization, are combined into offsets over a single VGRF, so we can't easily identify the constant cases. The copy-prop pass make this more visible for opt_zero_samples. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25742> --- src/intel/compiler/brw_fs.cpp | 15 ++++++++------- src/intel/compiler/brw_fs_nir.cpp | 24 ++++++++++++++++-------- src/intel/compiler/brw_ir_fs.h | 1 + 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index ec9d3a9b15d..f46f394658b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3108,13 +3108,6 @@ fs_visitor::opt_zero_samples() /* Implementation supports only SENDs, so applicable to Gfx7+ only. */ assert(devinfo->ver >= 7); - /* Gfx12.5 has restrictions on the number of coordinate - * parameters that have to be provided for some texture types - * (Wa_14012688258). - */ - if (intel_needs_workaround(devinfo, 14012688258)) - return false; - bool progress = false; foreach_block_and_inst(block, fs_inst, send, cfg) { @@ -3122,6 +3115,14 @@ fs_visitor::opt_zero_samples() send->sfid != BRW_SFID_SAMPLER) continue; + /* Wa_14012688258: + * + * Don't trim zeros at the end of payload for sample operations + * in cube and cube arrays. + */ + if (send->keep_payload_trailing_zeros) + continue; + /* This pass works on SENDs before splitting. */ if (send->ex_mlen > 0) continue; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index add1f62afae..01157de9a74 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -6474,14 +6474,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, BRW_REGISTER_TYPE_F); break; } - - /* Wa_14012688258: - * - * Compiler should send U,V,R parameters even if V,R are 0. - */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - intel_needs_workaround(devinfo, 14012688258)) - assert(instr->coord_components >= 3u); break; case nir_tex_src_ddx: srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F); @@ -6723,6 +6715,22 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE) inst->shadow_compare = true; + /* Wa_14012688258: + * + * Don't trim zeros at the end of payload for sample operations + * in cube and cube arrays. + */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + intel_needs_workaround(devinfo, 14012688258)) { + + /* Compiler should send U,V,R parameters even if V,R are 0. */ + if (srcs[TEX_LOGICAL_SRC_COORDINATE].file != BAD_FILE) + assert(instr->coord_components >= 3u); + + /* See opt_zero_samples(). */ + inst->keep_payload_trailing_zeros = true; + } + fs_reg nir_dest[5]; for (unsigned i = 0; i < dest_size; i++) nir_dest[i] = offset(dst, bld, i); diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index c7215caf45f..7ef815a7601 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -426,6 +426,7 @@ public: bool last_rt:1; bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ + bool keep_payload_trailing_zeros; tgl_swsb sched; /**< Scheduling info. */ };