Module: Mesa
Branch: main
Commit: 57b0f1958292c36340994600961e41bfe5e90dff
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57b0f1958292c36340994600961e41bfe5e90dff

Author: Qiang Yu <[email protected]>
Date:   Fri Aug 11 16:58:36 2023 +0800

aco: add create_fs_end_for_epilog for radeonsi

Reviewed-by: Rhys Perry <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24973>

---

 src/amd/compiler/aco_instruction_selection.cpp | 60 +++++++++++++++++++++++++-
 src/amd/compiler/aco_shader_info.h             |  7 ++-
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index a63fdd0156d..9460c095a24 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11139,6 +11139,61 @@ create_tcs_end_for_epilog(isel_context* ctx)
    build_end_with_regs(ctx, regs);
 }
 
+static void
+create_fs_end_for_epilog(isel_context* ctx)
+{
+   Builder bld(ctx->program, ctx->block);
+
+   std::vector<Operand> regs;
+
+   regs.emplace_back(get_arg_for_end(ctx, 
ctx->program->info.ps.alpha_reference));
+
+   unsigned vgpr = 256;
+
+   for (unsigned slot = FRAG_RESULT_DATA0; slot <= FRAG_RESULT_DATA7; slot++) {
+      unsigned index = slot - FRAG_RESULT_DATA0;
+      unsigned type = (ctx->output_color_types >> (index * 2)) & 0x3;
+      unsigned write_mask = ctx->outputs.mask[slot];
+
+      if (!write_mask)
+         continue;
+
+      if (type == ACO_TYPE_ANY32) {
+         u_foreach_bit (i, write_mask) {
+            regs.emplace_back(Operand(ctx->outputs.temps[slot * 4 + i], 
PhysReg{vgpr + i}));
+         }
+      } else {
+         for (unsigned i = 0; i < 2; i++) {
+            unsigned mask = (write_mask >> (i * 2)) & 0x3;
+            if (!mask)
+               continue;
+
+            unsigned chan = slot * 4 + i * 2;
+            Operand lo = mask & 0x1 ? Operand(ctx->outputs.temps[chan]) : 
Operand(v2b);
+            Operand hi = mask & 0x2 ? Operand(ctx->outputs.temps[chan + 1]) : 
Operand(v2b);
+
+            Temp dst = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), 
lo, hi);
+            regs.emplace_back(Operand(dst, PhysReg{vgpr + i}));
+         }
+      }
+      vgpr += 4;
+   }
+
+   if (ctx->outputs.mask[FRAG_RESULT_DEPTH])
+      regs.emplace_back(Operand(ctx->outputs.temps[FRAG_RESULT_DEPTH * 4], 
PhysReg{vgpr++}));
+
+   if (ctx->outputs.mask[FRAG_RESULT_STENCIL])
+      regs.emplace_back(Operand(ctx->outputs.temps[FRAG_RESULT_STENCIL * 4], 
PhysReg{vgpr++}));
+
+   if (ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK])
+      regs.emplace_back(Operand(ctx->outputs.temps[FRAG_RESULT_SAMPLE_MASK * 
4], PhysReg{vgpr++}));
+
+   build_end_with_regs(ctx, regs);
+
+   /* Exit WQM mode finally. */
+   ctx->program->needs_exact = true;
+}
+
 Pseudo_instruction*
 add_startpgm(struct isel_context* ctx)
 {
@@ -11661,7 +11716,10 @@ select_shader(isel_context& ctx, nir_shader* nir, 
const bool need_startpgm, cons
 
    if (ctx.program->info.has_epilog) {
       if (ctx.stage == fragment_fs) {
-         create_fs_jump_to_epilog(&ctx);
+         if (ctx.options->is_opengl)
+            create_fs_end_for_epilog(&ctx);
+         else
+            create_fs_jump_to_epilog(&ctx);
 
          /* FS epilogs always have at least one color/null export. */
          ctx.program->has_color_exports = true;
diff --git a/src/amd/compiler/aco_shader_info.h 
b/src/amd/compiler/aco_shader_info.h
index 1b491023dd3..d2b6335f040 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -158,9 +158,14 @@ struct aco_shader_info {
       struct ac_arg vs_state_bits;
    } tcs;
    struct {
-      struct ac_arg epilog_pc;
       uint32_t num_interp;
       unsigned spi_ps_input;
+
+      /* Vulkan only */
+      struct ac_arg epilog_pc;
+
+      /* OpenGL only */
+      struct ac_arg alpha_reference;
    } ps;
    struct {
       uint8_t subgroup_size;

Reply via email to