Module: Mesa
Branch: master
Commit: e0dff5fd86179b4d265060d5fc6138bb6a50b54d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e0dff5fd86179b4d265060d5fc6138bb6a50b54d

Author: Timur Kristóf <[email protected]>
Date:   Thu Mar 12 17:20:16 2020 +0100

aco: Create null exports in instruction selection instead of assembler.

This allows the passes after isel to assume that the exports are
always correct, and also allows to schedule these null exports later.
Additionally, it ensures that the correct exec mask is used for
these exports.

Totals from affected shaders (GFX10):
SGPRS: 84224 -> 84344 (0.14 %)
VGPRS: 23088 -> 23076 (-0.05 %)
Code Size: 882892 -> 894368 (1.30 %) bytes

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165>

---

 src/amd/compiler/aco_assembler.cpp             | 25 ++++--------
 src/amd/compiler/aco_instruction_selection.cpp | 55 +++++++++++++++++++-------
 src/amd/compiler/aco_lower_to_hw_instr.cpp     | 28 +++++++++++--
 3 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp 
b/src/amd/compiler/aco_assembler.cpp
index 7683c8f45af..9544fc35bd4 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -595,11 +595,11 @@ void emit_block(asm_context& ctx, std::vector<uint32_t>& 
out, Block& block)
 
 void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* 
program)
 {
+   bool exported = false;
    for (Block& block : program->blocks) {
       if (!(block.kind & block_kind_export_end))
          continue;
       std::vector<aco_ptr<Instruction>>::reverse_iterator it = 
block.instructions.rbegin();
-      bool exported = false;
       while ( it != block.instructions.rend())
       {
          if ((*it)->format == Format::EXP) {
@@ -620,22 +620,13 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& 
out, Program* program)
             break;
          ++it;
       }
-      if (exported)
-         continue;
-      /* we didn't find an Export instruction and have to insert a null export 
*/
-      aco_ptr<Export_instruction> 
exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
-      for (unsigned i = 0; i < 4; i++)
-         exp->operands[i] = Operand(v1);
-      exp->enabled_mask = 0;
-      exp->compressed = false;
-      exp->done = true;
-      exp->valid_mask = (program->stage & hw_fs) || program->chip_class >= 
GFX10;
-      if (program->stage & hw_fs)
-         exp->dest = 9; /* NULL */
-      else
-         exp->dest = V_008DFC_SQ_EXP_POS;
-      /* insert the null export 1 instruction before branch/endpgm */
-      block.instructions.insert(block.instructions.end() - 1, std::move(exp));
+   }
+
+   if (!exported) {
+      /* Abort in order to avoid a GPU hang. */
+      fprintf(stderr, "Missing export in %s shader:\n", (program->stage & 
hw_vs) ? "vertex" : "fragment");
+      aco_print_program(program, stderr);
+      abort();
    }
 }
 
diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index a8b318c63c5..b9d203a6391 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -8981,7 +8981,20 @@ static bool visit_cf_list(isel_context *ctx,
    return false;
 }
 
-static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int 
*next_pos)
+static void create_null_export(isel_context *ctx)
+{
+   /* Some shader stages always need to have exports.
+    * So when there is none, we need to add a null export.
+    */
+
+   unsigned dest = (ctx->program->stage & hw_fs) ? 9 /* NULL */ : 
V_008DFC_SQ_EXP_POS;
+   bool vm = (ctx->program->stage & hw_fs) || ctx->program->chip_class >= 
GFX10;
+   Builder bld(ctx->program, ctx->block);
+   bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
+           /* enabled_mask */ 0, dest, /* compr */ false, /* done */ true, vm);
+}
+
+static bool export_vs_varying(isel_context *ctx, int slot, bool is_pos, int 
*next_pos)
 {
    assert(ctx->stage == vertex_vs ||
           ctx->stage == tess_eval_vs ||
@@ -8992,9 +9005,9 @@ static void export_vs_varying(isel_context *ctx, int 
slot, bool is_pos, int *nex
                 : ctx->program->info->vs.outinfo.vs_output_param_offset[slot];
    uint64_t mask = ctx->outputs.mask[slot];
    if (!is_pos && !mask)
-      return;
+      return false;
    if (!is_pos && offset == AC_EXP_PARAM_UNDEFINED)
-      return;
+      return false;
    aco_ptr<Export_instruction> 
exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
    exp->enabled_mask = mask;
    for (unsigned i = 0; i < 4; ++i) {
@@ -9014,6 +9027,8 @@ static void export_vs_varying(isel_context *ctx, int 
slot, bool is_pos, int *nex
    else
       exp->dest = V_008DFC_SQ_EXP_PARAM + offset;
    ctx->block->instructions.emplace_back(std::move(exp));
+
+   return true;
 }
 
 static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos)
@@ -9075,14 +9090,15 @@ static void create_vs_exports(isel_context *ctx)
 
    /* the order these position exports are created is important */
    int next_pos = 0;
-   export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
+   bool exported_pos = export_vs_varying(ctx, VARYING_SLOT_POS, true, 
&next_pos);
    if (outinfo->writes_pointsize || outinfo->writes_layer || 
outinfo->writes_viewport_index) {
       export_vs_psiz_layer_viewport(ctx, &next_pos);
+      exported_pos = true;
    }
    if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
-      export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos);
+      exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, 
&next_pos);
    if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
-      export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
+      exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, 
&next_pos);
 
    if (ctx->export_clip_dists) {
       if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
@@ -9098,9 +9114,12 @@ static void create_vs_exports(isel_context *ctx)
 
       export_vs_varying(ctx, i, false, NULL);
    }
+
+   if (!exported_pos)
+      create_null_export(ctx);
 }
 
-static void export_fs_mrt_z(isel_context *ctx)
+static bool export_fs_mrt_z(isel_context *ctx)
 {
    Builder bld(ctx->program, ctx->block);
    unsigned enabled_channels = 0;
@@ -9157,9 +9176,11 @@ static void export_fs_mrt_z(isel_context *ctx)
 
    bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3],
            enabled_channels, V_008DFC_SQ_EXP_MRTZ, compr);
+
+   return true;
 }
 
-static void export_fs_mrt_color(isel_context *ctx, int slot)
+static bool export_fs_mrt_color(isel_context *ctx, int slot)
 {
    Builder bld(ctx->program, ctx->block);
    unsigned write_mask = ctx->outputs.mask[slot];
@@ -9276,7 +9297,7 @@ static void export_fs_mrt_color(isel_context *ctx, int 
slot)
    }
 
    if (target == V_008DFC_SQ_EXP_NULL)
-      return;
+      return false;
 
    if ((bool) compr_op) {
       for (int i = 0; i < 2; i++) {
@@ -9300,22 +9321,26 @@ static void export_fs_mrt_color(isel_context *ctx, int 
slot)
 
    bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3],
            enabled_channels, target, (bool) compr_op);
+   return true;
 }
 
 static void create_fs_exports(isel_context *ctx)
 {
+   bool exported = false;
+
    /* Export depth, stencil and sample mask. */
    if (ctx->outputs.mask[FRAG_RESULT_DEPTH] ||
        ctx->outputs.mask[FRAG_RESULT_STENCIL] ||
-       ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK]) {
-      export_fs_mrt_z(ctx);
-   }
+       ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK])
+      exported |= export_fs_mrt_z(ctx);
 
    /* Export all color render targets. */
-   for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) {
+   for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
       if (ctx->outputs.mask[i])
-         export_fs_mrt_color(ctx, i);
-   }
+         exported |= export_fs_mrt_color(ctx, i);
+
+   if (!exported)
+      create_null_export(ctx);
 }
 
 static void write_tcs_tess_factors(isel_context *ctx)
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index f42484c502f..6f2f54f6992 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -969,10 +969,30 @@ void lower_to_hw_instr(Program* program)
             }
             case aco_opcode::p_exit_early_if:
             {
-               /* don't bother with an early exit at the end of the program */
-               if (block->instructions[j + 1]->opcode == 
aco_opcode::p_logical_end &&
-                   block->instructions[j + 2]->opcode == aco_opcode::s_endpgm) 
{
-                  break;
+               /* don't bother with an early exit near the end of the program 
*/
+               if ((block->instructions.size() - 1 - j) <= 4 &&
+                    block->instructions.back()->opcode == 
aco_opcode::s_endpgm) {
+                  unsigned null_exp_dest = (ctx.program->stage & hw_fs) ? 9 /* 
NULL */ : V_008DFC_SQ_EXP_POS;
+                  bool ignore_early_exit = true;
+
+                  for (unsigned k = j + 1; k < block->instructions.size(); 
++k) {
+                     const aco_ptr<Instruction> &instr = 
block->instructions[k];
+                     if (instr->opcode == aco_opcode::s_endpgm ||
+                         instr->opcode == aco_opcode::p_logical_end)
+                        continue;
+                     else if (instr->opcode == aco_opcode::exp &&
+                              static_cast<Export_instruction 
*>(instr.get())->dest == null_exp_dest)
+                        continue;
+                     else if (instr->opcode == aco_opcode::p_parallelcopy &&
+                         instr->definitions[0].isFixed() &&
+                         instr->definitions[0].physReg() == exec)
+                        continue;
+
+                     ignore_early_exit = false;
+                  }
+
+                  if (ignore_early_exit)
+                     break;
                }
 
                if (!discard_block) {

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to