Module: Mesa
Branch: main
Commit: daeab51a628a61dade7bac0c6d06361714f57e1b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=daeab51a628a61dade7bac0c6d06361714f57e1b

Author: Caio Oliveira <caio.olive...@intel.com>
Date:   Wed Oct 25 21:07:26 2023 -0700

intel/compiler: Re-enable opt_zero_samples() for Gfx7+

Inadvertently, because of a sequence of changes elsewhere, this pass
ended up not having any effect:

- Before Gfx5 the optimization is not applicable.

- On Gfx5-6 it doesn't apply because it sampler operations don't
  currently use LOAD_PAYLOAD, but write the MOVs directly.  Not clear to
  me whether they ever did.

- On Gfx7+ it doesn't apply anymore because now the logical sampler
  operations are now lowered directly to SENDs, and the is_tex() check
  would skip them.

Since the LOAD_PAYLOAD implementation applies for Gfx7+ only, rework the
pass to work again by handling SEND instructions.  To make the pass
easier, the optimization will happen before opt_split_sends() so only
one LOAD_PAYLOAD needs to be cared for.

Update the code to accept BAD_FILE sources in addition to zeros, these
are added in some cases as padding and effectively are don't care
values, so we can assume them zeros.

Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25742>

---

 src/intel/compiler/brw_fs.cpp | 63 ++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index a794ed452a2..ec9d3a9b15d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3096,7 +3096,7 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned 
size_read)
 
 /**
  * Optimize sample messages that have constant zero values for the trailing
- * texture coordinates. We can just reduce the message length for these
+ * parameters. We can just reduce the message length for these
  * instructions instead of reserving a register for it. Trailing parameters
  * that aren't sent default to zero anyway. This will cause the dead code
  * eliminator to remove the MOV instruction that would otherwise be emitted to
@@ -3105,26 +3105,36 @@ load_payload_sources_read_for_size(fs_inst *lp, 
unsigned size_read)
 bool
 fs_visitor::opt_zero_samples()
 {
-   /* Gfx4 infers the texturing opcode based on the message length so we can't
-    * change it.  Gfx12.5 has restrictions on the number of coordinate
+   /* Implementation supports only SENDs, so applicable to Gfx7+ only. */
+   assert(devinfo->ver >= 7);
+
+   /* Gfx12.5 has restrictions on the number of coordinate
     * parameters that have to be provided for some texture types
     * (Wa_14012688258).
     */
-   if (devinfo->ver < 5 || intel_needs_workaround(devinfo, 14012688258))
+   if (intel_needs_workaround(devinfo, 14012688258))
       return false;
 
    bool progress = false;
 
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (!inst->is_tex())
+   foreach_block_and_inst(block, fs_inst, send, cfg) {
+      if (send->opcode != SHADER_OPCODE_SEND ||
+          send->sfid != BRW_SFID_SAMPLER)
+         continue;
+
+      /* This pass works on SENDs before splitting. */
+      if (send->ex_mlen > 0)
          continue;
 
-      fs_inst *load_payload = (fs_inst *) inst->prev;
+      fs_inst *lp = (fs_inst *) send->prev;
 
-      if (load_payload->is_head_sentinel() ||
-          load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
+      if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
          continue;
 
+      /* How much of the payload are actually read by this SEND. */
+      const unsigned params =
+         load_payload_sources_read_for_size(lp, send->mlen * REG_SIZE);
+
       /* We don't want to remove the message header or the first parameter.
        * Removing the first parameter is not allowed, see the Haswell PRM
        * volume 7, page 149:
@@ -3132,11 +3142,17 @@ fs_visitor::opt_zero_samples()
        *     "Parameter 0 is required except for the sampleinfo message, which
        *      has no parameter 0"
        */
-      while (inst->mlen > inst->header_size + inst->exec_size / 8 &&
-             load_payload->src[(inst->mlen - inst->header_size) /
-                               (inst->exec_size / 8) +
-                               inst->header_size - 1].is_zero()) {
-         inst->mlen -= inst->exec_size / 8;
+      const unsigned first_param_idx = lp->header_size;
+      unsigned zero_size = 0;
+      for (unsigned i = params - 1; i > first_param_idx; i--) {
+         if (lp->src[i].file != BAD_FILE && !lp->src[i].is_zero())
+            break;
+         zero_size += lp->exec_size * type_sz(lp->src[i].type) * 
lp->dst.stride;
+      }
+
+      const unsigned zero_len = zero_size / (reg_unit(devinfo) * REG_SIZE);
+      if (zero_len > 0) {
+         send->mlen -= zero_len;
          progress = true;
       }
    }
@@ -6382,7 +6398,18 @@ fs_visitor::optimize()
    OPT(lower_logical_sends);
 
    /* After logical SEND lowering. */
-   OPT(opt_copy_propagation);
+
+   if (OPT(opt_copy_propagation))
+      OPT(opt_algebraic);
+
+   /* Identify trailing zeros LOAD_PAYLOAD of sampler messages.
+    * Do this before splitting SENDs.
+    */
+   if (devinfo->ver >= 7) {
+      if (OPT(opt_zero_samples) && OPT(opt_copy_propagation))
+         OPT(opt_algebraic);
+   }
+
    OPT(opt_split_sends);
    OPT(fixup_nomask_control_flow);
 
@@ -6390,12 +6417,6 @@ fs_visitor::optimize()
       if (OPT(opt_copy_propagation))
          OPT(opt_algebraic);
 
-      /* Only run after logical send lowering because it's easier to implement
-       * in terms of physical sends.
-       */
-      if (OPT(opt_zero_samples) && OPT(opt_copy_propagation))
-         OPT(opt_algebraic);
-
       /* Run after logical send lowering to give it a chance to CSE the
        * LOAD_PAYLOAD instructions created to construct the payloads of
        * e.g. texturing messages in cases where it wasn't possible to CSE the

Reply via email to