Module: Mesa
Branch: main
Commit: 40416850f186615d4d2cce95323137262a6789ba
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=40416850f186615d4d2cce95323137262a6789ba

Author: Caio Oliveira <caio.olive...@intel.com>
Date:   Tue Oct 31 20:45:31 2023 -0700

intel/compiler: Re-enable opt_zero_samples() in many cases for Gfx12.5

The workaround applies specifically to Cube and Cube Arrays, so we can
still apply the optimization for the others.

Ideally we would like to pull opt_zero_samples logic into the lowering
sends -- to avoid adding a bit to communicate between passes.  However
the texture coordinates for the LOGICAL backend instructions, which
are a common target for the optimization, are combined into offsets over
a single VGRF, so we can't easily identify the constant cases.  The
copy-prop pass make this more visible for opt_zero_samples.

Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25742>

---

 src/intel/compiler/brw_fs.cpp     | 15 ++++++++-------
 src/intel/compiler/brw_fs_nir.cpp | 24 ++++++++++++++++--------
 src/intel/compiler/brw_ir_fs.h    |  1 +
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index ec9d3a9b15d..f46f394658b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3108,13 +3108,6 @@ fs_visitor::opt_zero_samples()
    /* Implementation supports only SENDs, so applicable to Gfx7+ only. */
    assert(devinfo->ver >= 7);
 
-   /* Gfx12.5 has restrictions on the number of coordinate
-    * parameters that have to be provided for some texture types
-    * (Wa_14012688258).
-    */
-   if (intel_needs_workaround(devinfo, 14012688258))
-      return false;
-
    bool progress = false;
 
    foreach_block_and_inst(block, fs_inst, send, cfg) {
@@ -3122,6 +3115,14 @@ fs_visitor::opt_zero_samples()
           send->sfid != BRW_SFID_SAMPLER)
          continue;
 
+      /* Wa_14012688258:
+       *
+       * Don't trim zeros at the end of payload for sample operations
+       * in cube and cube arrays.
+       */
+      if (send->keep_payload_trailing_zeros)
+         continue;
+
       /* This pass works on SENDs before splitting. */
       if (send->ex_mlen > 0)
          continue;
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index add1f62afae..01157de9a74 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -6474,14 +6474,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
             srcs[TEX_LOGICAL_SRC_COORDINATE] = retype(src, 
BRW_REGISTER_TYPE_F);
             break;
          }
-
-         /* Wa_14012688258:
-          *
-          * Compiler should send U,V,R parameters even if V,R are 0.
-          */
-         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-             intel_needs_workaround(devinfo, 14012688258))
-            assert(instr->coord_components >= 3u);
          break;
       case nir_tex_src_ddx:
          srcs[TEX_LOGICAL_SRC_LOD] = retype(src, BRW_REGISTER_TYPE_F);
@@ -6723,6 +6715,22 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, 
nir_tex_instr *instr)
    if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
       inst->shadow_compare = true;
 
+   /* Wa_14012688258:
+    *
+    * Don't trim zeros at the end of payload for sample operations
+    * in cube and cube arrays.
+    */
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+       intel_needs_workaround(devinfo, 14012688258)) {
+
+      /* Compiler should send U,V,R parameters even if V,R are 0. */
+      if (srcs[TEX_LOGICAL_SRC_COORDINATE].file != BAD_FILE)
+         assert(instr->coord_components >= 3u);
+
+      /* See opt_zero_samples(). */
+      inst->keep_payload_trailing_zeros = true;
+   }
+
    fs_reg nir_dest[5];
    for (unsigned i = 0; i < dest_size; i++)
       nir_dest[i] = offset(dst, bld, i);
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index c7215caf45f..7ef815a7601 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -426,6 +426,7 @@ public:
 
    bool last_rt:1;
    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
+   bool keep_payload_trailing_zeros;
 
    tgl_swsb sched; /**< Scheduling info. */
 };

Reply via email to