Module: Mesa
Branch: main
Commit: 8313016543758dc30c862e686f12a40f32b95f47
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8313016543758dc30c862e686f12a40f32b95f47

Author: Alyssa Rosenzweig <[email protected]>
Date:   Sun Feb 19 23:08:58 2023 -0500

nir/lower_blend: Consume dual stores

Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.

The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower

   x = ...
   primary color = x
   y = ...
   dual color = y

we end up emitting uses of y before it has been defined, something like

   x = ...
   primary color = blend(x, y)
   y = ...

Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct

   x = ...
   y = ...
   primary color = blend(x, y)

lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.

Signed-off-by: Alyssa Rosenzweig <[email protected]>
Reviewed-by: Faith Ekstrand <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>

---

 src/compiler/nir/nir_lower_blend.c | 71 +++++++++++++++++++++++++++++++++++---
 src/compiler/nir/nir_lower_blend.h |  2 --
 src/panfrost/lib/pan_blend.c       | 54 ++++++++++++-----------------
 3 files changed, 89 insertions(+), 38 deletions(-)

diff --git a/src/compiler/nir/nir_lower_blend.c 
b/src/compiler/nir/nir_lower_blend.c
index 5e5e9e4ccdf..d80241aafb7 100644
--- a/src/compiler/nir/nir_lower_blend.c
+++ b/src/compiler/nir/nir_lower_blend.c
@@ -36,6 +36,11 @@
 #include "compiler/nir/nir_format_convert.h"
 #include "nir_lower_blend.h"
 
+struct ctx {
+   const nir_lower_blend_options *options;
+   nir_ssa_def *src1[8];
+};
+
 /* Given processed factors, combine them per a blend function */
 
 static nir_ssa_def *
@@ -364,8 +369,10 @@ nir_blend(
       bconst = nir_load_blend_const_color_rgba(b);
    }
 
-   if (src->bit_size == 16)
+   if (src->bit_size == 16) {
       bconst = nir_f2f16(b, bconst);
+      src1 = nir_f2f16(b, src1);
+   }
 
    /* Fixed-point framebuffers require their inputs clamped. */
    enum pipe_format format = options->format[rt];
@@ -464,7 +471,8 @@ nir_blend_replace_rt(const nir_lower_blend_rt *rt)
 static bool
 nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-   const nir_lower_blend_options *options = data;
+   struct ctx *ctx = data;
+   const nir_lower_blend_options *options = ctx->options;
    if (instr->type != nir_instr_type_intrinsic)
       return false;
 
@@ -479,7 +487,16 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, 
void *data)
    if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
       return false;
 
-   b->cursor = nir_before_instr(instr);
+   /* Only process stores once. Pass flags are cleared by consume_dual_stores 
*/
+   if (instr->pass_flags)
+      return false;
+
+   instr->pass_flags = 1;
+
+   /* Store are sunk to the bottom of the block to ensure that the dual
+    * source colour is already written.
+    */
+   b->cursor = nir_after_block(instr->block);
 
    /* Grab the input color.  We always want 4 channels during blend.  Dead
     * code will clean up any channels we don't need.
@@ -514,7 +531,7 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, 
void *data)
    } else if (!util_format_is_pure_integer(options->format[rt]) &&
               !nir_blend_replace_rt(&options->rt[rt])) {
       assert(!util_format_is_scaled(options->format[rt]));
-      blended = nir_blend(b, options, rt, src, options->src1, dst);
+      blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
    }
 
    /* Apply a colormask if necessary */
@@ -534,6 +551,44 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, 
void *data)
 
    /* Write out the final color instead of the input */
    nir_instr_rewrite_src_ssa(instr, &store->src[0], blended);
+
+   /* Sink to bottom */
+   nir_instr_remove(instr);
+   nir_builder_instr_insert(b, instr);
+   return true;
+}
+
+/*
+ * Dual-source colours are only for blending, so when nir_lower_blend is used,
+ * the dual source store_output is for us (only). Remove dual stores so the
+ * backend doesn't have to deal with them, collecting the sources for blending.
+ */
+static bool
+consume_dual_stores(nir_builder *b, nir_instr *instr, void *data)
+{
+   nir_ssa_def **outputs = data;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
+   if (store->intrinsic != nir_intrinsic_store_output)
+      return false;
+
+   /* While we're here, clear the pass flags for store_outputs, since we'll set
+    * them later.
+    */
+   instr->pass_flags = 0;
+
+   nir_io_semantics sem = nir_intrinsic_io_semantics(store);
+   if (sem.dual_source_blend_index == 0)
+      return false;
+
+   int rt = color_index_for_location(sem.location);
+   assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
+   assert(store->src[0].is_ssa && "must be SSA");
+
+   outputs[rt] = store->src[0].ssa;
+   nir_instr_remove(instr);
    return true;
 }
 
@@ -547,8 +602,14 @@ nir_lower_blend(nir_shader *shader, const 
nir_lower_blend_options *options)
 {
    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
 
+   struct ctx ctx = {.options = options};
+   nir_shader_instructions_pass(shader, consume_dual_stores,
+                                nir_metadata_block_index |
+                                nir_metadata_dominance,
+                                ctx.src1);
+
    nir_shader_instructions_pass(shader, nir_lower_blend_instr,
                                 nir_metadata_block_index |
                                 nir_metadata_dominance,
-                                (void *)options);
+                                &ctx);
 }
diff --git a/src/compiler/nir/nir_lower_blend.h 
b/src/compiler/nir/nir_lower_blend.h
index 50a46ce7a72..aaf45664125 100644
--- a/src/compiler/nir/nir_lower_blend.h
+++ b/src/compiler/nir/nir_lower_blend.h
@@ -57,8 +57,6 @@ typedef struct {
    bool logicop_enable;
    unsigned logicop_func;
 
-   nir_ssa_def *src1;
-
    /* If set, will use load_blend_const_color_{r,g,b,a}_float instead of
     * load_blend_const_color_rgba */
    bool scalar_blend_const;
diff --git a/src/panfrost/lib/pan_blend.c b/src/panfrost/lib/pan_blend.c
index 865919fda13..cda49b9aa9d 100644
--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
@@ -663,49 +663,41 @@ GENX(pan_blend_create_shader)(const struct 
panfrost_device *dev,
          rt_state->equation.alpha_invert_dst_factor;
    }
 
-   nir_alu_type src_types[] = {src0_type ?: nir_type_float32,
-                               src1_type ?: nir_type_float32};
-
-   /* HACK: workaround buggy TGSI shaders (u_blitter) */
-   for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
-      src_types[i] = nir_alu_type_get_base_type(nir_type) |
-                     nir_alu_type_get_type_size(src_types[i]);
-   }
-
    nir_ssa_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
    nir_ssa_def *zero = nir_imm_int(&b, 0);
 
-   nir_ssa_def *s_src[2];
    for (unsigned i = 0; i < 2; ++i) {
-      s_src[i] = nir_load_interpolated_input(
-         &b, 4, nir_alu_type_get_type_size(src_types[i]), pixel, zero,
+      nir_alu_type src_type =
+         (i == 1 ? src1_type : src0_type) ?: nir_type_float32;
+
+      /* HACK: workaround buggy TGSI shaders (u_blitter) */
+      src_type = nir_alu_type_get_base_type(nir_type) |
+                 nir_alu_type_get_type_size(src_type);
+
+      nir_ssa_def *src = nir_load_interpolated_input(
+         &b, 4, nir_alu_type_get_type_size(src_type), pixel, zero,
          .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
-         .io_semantics.num_slots = 1, .base = i, .dest_type = src_types[i]);
-   }
+         .io_semantics.num_slots = 1, .base = i, .dest_type = src_type);
 
-   /* On Midgard, the blend shader is responsible for format conversion.
-    * As the OpenGL spec requires integer conversions to saturate, we must
-    * saturate ourselves here. On Bifrost and later, the conversion
-    * hardware handles this automatically.
-    */
-   for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
+      /* On Midgard, the blend shader is responsible for format conversion.
+       * As the OpenGL spec requires integer conversions to saturate, we must
+       * saturate ourselves here. On Bifrost and later, the conversion
+       * hardware handles this automatically.
+       */
       nir_alu_type T = nir_alu_type_get_base_type(nir_type);
       bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
-      s_src[i] =
-         nir_convert_with_rounding(&b, s_src[i], src_types[i], nir_type,
-                                   nir_rounding_mode_undef, should_saturate);
+      src = nir_convert_with_rounding(&b, src, T, nir_type,
+                                      nir_rounding_mode_undef, 
should_saturate);
+
+      nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
+                       .src_type = nir_type,
+                       .io_semantics.location = FRAG_RESULT_DATA0 + rt,
+                       .io_semantics.num_slots = 1,
+                       .io_semantics.dual_source_blend_index = i);
    }
 
-   /* Build a trivial blend shader */
-   nir_store_output(&b, s_src[0], zero, .write_mask = BITFIELD_MASK(4),
-                    .src_type = nir_type,
-                    .io_semantics.location = FRAG_RESULT_DATA0 + rt,
-                    .io_semantics.num_slots = 1);
-
    b.shader->info.io_lowered = true;
 
-   options.src1 = s_src[1];
-
    NIR_PASS_V(b.shader, nir_lower_blend, &options);
    nir_shader_instructions_pass(
       b.shader, pan_inline_blend_constants,

Reply via email to