Module: Mesa
Branch: main
Commit: 9972a385fb363a877a6107528f90d70cf71ef877
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9972a385fb363a877a6107528f90d70cf71ef877

Author: Qiang Yu <[email protected]>
Date:   Tue Aug 15 17:27:48 2023 +0800

aco: simplify export_fs_mrt_color

It's now used by ps epilog only.

Reviewed-by: Rhys Perry <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24973>

---

 src/amd/compiler/aco_instruction_selection.cpp | 133 +++++++++----------------
 1 file changed, 48 insertions(+), 85 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 4273bf2cfb7..a925c5d1f70 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10662,18 +10662,6 @@ visit_cf_list(isel_context* ctx, struct exec_list* 
list)
    return false;
 }
 
-struct mrt_color_export {
-   int slot;
-   unsigned write_mask;
-   Operand values[4];
-   uint8_t col_format;
-
-   /* Fields below are only used for PS epilogs. */
-   bool is_int8;
-   bool is_int10;
-   bool enable_mrt_output_nan_fixup;
-};
-
 static void
 export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt)
 {
@@ -10686,32 +10674,36 @@ export_mrt(isel_context* ctx, const struct 
aco_export_mrt* mrt)
 }
 
 static bool
-export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
-                    struct aco_export_mrt* mrt)
+export_fs_mrt_color(isel_context* ctx, const struct aco_ps_epilog_info* info, 
Temp colors[4],
+                    unsigned slot, struct aco_export_mrt* mrt)
 {
+   unsigned col_format = (info->spi_shader_col_format >> (slot * 4)) & 0xf;
+
+   if (col_format == V_028714_SPI_SHADER_ZERO)
+      return false;
+
    Builder bld(ctx->program, ctx->block);
    Operand values[4];
 
    for (unsigned i = 0; i < 4; ++i) {
-      values[i] = out->values[i];
+      values[i] = Operand(colors[i]);
    }
 
-   unsigned target;
+   unsigned target = V_008DFC_SQ_EXP_MRT + slot;
    unsigned enabled_channels = 0;
    aco_opcode compr_op = aco_opcode::num_opcodes;
    bool compr = false;
-   bool is_16bit = values[0].regClass() == v2b;
-
-   target = V_008DFC_SQ_EXP_MRT + out->slot;
+   bool is_16bit = colors[0].regClass() == v2b;
+   bool is_int8 = (info->color_is_int8 >> slot) & 1;
+   bool is_int10 = (info->color_is_int10 >> slot) & 1;
+   bool enable_mrt_output_nan_fixup = 
(ctx->options->enable_mrt_output_nan_fixup >> slot) & 1;
 
    /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
-   if (out->enable_mrt_output_nan_fixup && !is_16bit &&
-       (out->col_format == V_028714_SPI_SHADER_32_R ||
-        out->col_format == V_028714_SPI_SHADER_32_GR ||
-        out->col_format == V_028714_SPI_SHADER_32_AR ||
-        out->col_format == V_028714_SPI_SHADER_32_ABGR ||
-        out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
-      u_foreach_bit (i, out->write_mask) {
+   if (enable_mrt_output_nan_fixup && !is_16bit &&
+       (col_format == V_028714_SPI_SHADER_32_R || col_format == 
V_028714_SPI_SHADER_32_GR ||
+        col_format == V_028714_SPI_SHADER_32_AR || col_format == 
V_028714_SPI_SHADER_32_ABGR ||
+        col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+      for (unsigned i = 0; i < 4; i++) {
          Temp is_not_nan =
             bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], 
values[i]);
          values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), 
Operand::zero(), values[i],
@@ -10719,7 +10711,7 @@ export_fs_mrt_color(isel_context* ctx, const struct 
mrt_color_export* out,
       }
    }
 
-   switch (out->col_format) {
+   switch (col_format) {
    case V_028714_SPI_SHADER_32_R: enabled_channels = 1; break;
 
    case V_028714_SPI_SHADER_32_GR: enabled_channels = 0x3; break;
@@ -10737,31 +10729,20 @@ export_fs_mrt_color(isel_context* ctx, const struct 
mrt_color_export* out,
 
    case V_028714_SPI_SHADER_FP16_ABGR:
       for (int i = 0; i < 2; i++) {
-         bool enabled = (out->write_mask >> (i * 2)) & 0x3;
-         if (enabled) {
-            enabled_channels |= 0x3 << (i * 2);
-            if (is_16bit) {
-               values[i] =
-                  bld.pseudo(aco_opcode::p_create_vector, bld.def(v1),
-                             values[i * 2].isUndefined() ? Operand(v2b) : 
values[i * 2],
-                             values[i * 2 + 1].isUndefined() ? Operand(v2b) : 
values[i * 2 + 1]);
-            } else if (ctx->options->gfx_level == GFX8 || 
ctx->options->gfx_level == GFX9) {
-               values[i] =
-                  bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1),
-                           values[i * 2].isUndefined() ? Operand::zero() : 
values[i * 2],
-                           values[i * 2 + 1].isUndefined() ? Operand::zero() : 
values[i * 2 + 1]);
-            } else {
-               values[i] =
-                  bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1),
-                           values[i * 2].isUndefined() ? values[i * 2 + 1] : 
values[i * 2],
-                           values[i * 2 + 1].isUndefined() ? values[i * 2] : 
values[i * 2 + 1]);
-            }
+         if (is_16bit) {
+            values[i] = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), 
values[i * 2],
+                                   values[i * 2 + 1]);
+         } else if (ctx->options->gfx_level == GFX8 || ctx->options->gfx_level 
== GFX9) {
+            values[i] = bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, 
bld.def(v1), values[i * 2],
+                                 values[i * 2 + 1]);
          } else {
-            values[i] = Operand(v1);
+            values[i] = bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1), 
values[i * 2],
+                                 values[i * 2 + 1]);
          }
       }
       values[2] = Operand(v1);
       values[3] = Operand(v1);
+      enabled_channels = 0xf;
       compr = true;
       break;
 
@@ -10783,17 +10764,17 @@ export_fs_mrt_color(isel_context* ctx, const struct 
mrt_color_export* out,
 
    case V_028714_SPI_SHADER_UINT16_ABGR:
       compr_op = aco_opcode::v_cvt_pk_u16_u32;
-      if (out->is_int8 || out->is_int10) {
+      if (is_int8 || is_int10) {
          /* clamp */
-         uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0;
+         uint32_t max_rgb = is_int8 ? 255 : is_int10 ? 1023 : 0;
 
-         u_foreach_bit (i, out->write_mask) {
-            uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb;
+         for (unsigned i = 0; i < 4; i++) {
+            uint32_t max = i == 3 && is_int10 ? 3 : max_rgb;
 
             values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), 
Operand::c32(max), values[i]);
          }
       } else if (is_16bit) {
-         u_foreach_bit (i, out->write_mask) {
+         for (unsigned i = 0; i < 4; i++) {
             Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, 
false);
             values[i] = Operand(tmp);
          }
@@ -10802,20 +10783,20 @@ export_fs_mrt_color(isel_context* ctx, const struct 
mrt_color_export* out,
 
    case V_028714_SPI_SHADER_SINT16_ABGR:
       compr_op = aco_opcode::v_cvt_pk_i16_i32;
-      if (out->is_int8 || out->is_int10) {
+      if (is_int8 || is_int10) {
          /* clamp */
-         uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0;
-         uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0;
+         uint32_t max_rgb = is_int8 ? 127 : is_int10 ? 511 : 0;
+         uint32_t min_rgb = is_int8 ? -128 : is_int10 ? -512 : 0;
 
-         u_foreach_bit (i, out->write_mask) {
-            uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb;
-            uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb;
+         for (unsigned i = 0; i < 4; i++) {
+            uint32_t max = i == 3 && is_int10 ? 1 : max_rgb;
+            uint32_t min = i == 3 && is_int10 ? -2u : min_rgb;
 
             values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), 
Operand::c32(max), values[i]);
             values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), 
Operand::c32(min), values[i]);
          }
       } else if (is_16bit) {
-         u_foreach_bit (i, out->write_mask) {
+         for (unsigned i = 0; i < 4; i++) {
             Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, 
true);
             values[i] = Operand(tmp);
          }
@@ -10829,20 +10810,11 @@ export_fs_mrt_color(isel_context* ctx, const struct 
mrt_color_export* out,
    }
 
    if (compr_op != aco_opcode::num_opcodes) {
-      for (int i = 0; i < 2; i++) {
-         /* check if at least one of the values to be compressed is enabled */
-         bool enabled = (out->write_mask >> (i * 2)) & 0x3;
-         if (enabled) {
-            enabled_channels |= 0x3 << (i * 2);
-            values[i] = bld.vop3(
-               compr_op, bld.def(v1), values[i * 2].isUndefined() ? 
Operand::zero() : values[i * 2],
-               values[i * 2 + 1].isUndefined() ? Operand::zero() : values[i * 
2 + 1]);
-         } else {
-            values[i] = Operand(v1);
-         }
-      }
+      values[0] = bld.vop3(compr_op, bld.def(v1), values[0], values[1]);
+      values[1] = bld.vop3(compr_op, bld.def(v1), values[2], values[3]);
       values[2] = Operand(v1);
       values[3] = Operand(v1);
+      enabled_channels = 0xf;
       compr = true;
    } else if (!compr) {
       for (int i = 0; i < 4; i++)
@@ -12737,27 +12709,18 @@ select_ps_epilog(Program* program, void* pinfo, 
ac_shader_config* config,
    uint8_t exported_mrts = 0;
 
    for (unsigned i = 0; i < MAX_DRAW_BUFFERS; i++) {
-      unsigned col_format = (einfo->spi_shader_col_format >> (i * 4)) & 0xf;
-
-      if (col_format == V_028714_SPI_SHADER_ZERO)
+      if (!einfo->colors[i].used)
          continue;
 
-      struct mrt_color_export out;
-
-      out.slot = i;
-      out.write_mask = 0xf;
-      out.col_format = col_format;
-      out.is_int8 = (einfo->color_is_int8 >> i) & 1;
-      out.is_int10 = (einfo->color_is_int10 >> i) & 1;
-      out.enable_mrt_output_nan_fixup = (options->enable_mrt_output_nan_fixup 
>> i) & 1;
-
       Temp colors = get_arg(&ctx, einfo->colors[i]);
       emit_split_vector(&ctx, colors, 4);
+
+      Temp comps[4];
       for (unsigned c = 0; c < 4; ++c) {
-         out.values[c] = Operand(emit_extract_vector(&ctx, colors, c, v1));
+         comps[c] = emit_extract_vector(&ctx, colors, c, v1);
       }
 
-      if (export_fs_mrt_color(&ctx, &out, &mrts[i])) {
+      if (export_fs_mrt_color(&ctx, einfo, comps, i, &mrts[i])) {
          exported_mrts |= 1 << i;
       }
    }

Reply via email to