There are three simple heuristics for SIMD32 shader enabling:

- How many MRTs does the shader write into?
- How many grouped texture fetches does the shader have?
- How many instructions does the SIMD32 shader have compared to the SIMD16
   shader?

For testing purposes, the heuristics can be controlled via these environment
variables:

simd32_heuristic_mrt_check
- Enables MRT write check
- Default: true

simd32_heuristic_max_mrts
- How many MRT writes the heuristic allows
- Default: 1

simd32_heuristic_grouped_check
- Enables grouped texture fetch check
- Default: true

simd32_heuristic_grouped_sends
- How many grouped texture fetches the heuristic allows
- Default: 6

simd32_heuristic_inst_check
- Enables SIMD32 vs. SIMD16 instruction count check
- Default: true

simd32_heuristic_inst_ratio
- SIMD32 vs. SIMD16 instruction count ratio the heuristic allows
- Default: 2.3

SIMD32 shaders will not be compiled also when SIMD16 compilation fails or
spills.
---
 src/intel/compiler/brw_fs.cpp | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 02e151f..5cceb6c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7120,6 +7120,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
                char **error_str)
 {
    const struct gen_device_info *devinfo = compiler->devinfo;
+   bool simd16_failed = false;
+   bool simd16_spilled = false;
 
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
@@ -7187,10 +7189,12 @@ brw_compile_fs(const struct brw_compiler *compiler, 
void *log_data,
                      shader_time_index16);
       v16.import_uniforms(&v8);
       if (!v16.run_fs(allow_spilling, use_rep_send)) {
+         simd16_failed = true;
          compiler->shader_perf_log(log_data,
                                    "SIMD16 shader failed to compile: %s",
                                    v16.fail_msg);
       } else {
+         simd16_spilled = v16.spilled_any_registers;
          simd16_cfg = v16.cfg;
          prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
          prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
@@ -7198,9 +7202,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
    }
 
    /* Currently, the compiler only supports SIMD32 on SNB+ */
+   const brw_simd32_heuristics_control *ctrl = 
&compiler->simd32_heuristics_control;
+   uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;
+
    if (v8.max_dispatch_width >= 32 && !use_rep_send &&
        compiler->devinfo->gen >= 6 &&
-       unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+       (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+        (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+         !simd16_failed && !simd16_spilled &&
+         (!ctrl->mrt_check ||
+          (ctrl->mrt_check &&
+          u_count_bits64(&mrts) <= ctrl->max_mrts))))) {
       /* Try a SIMD32 compile */
       fs_visitor v32(compiler, log_data, mem_ctx, key,
                      &prog_data->base, prog, shader, 32,
@@ -7211,9 +7223,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
                                    "SIMD32 shader failed to compile: %s",
                                    v32.fail_msg);
       } else {
-         simd32_cfg = v32.cfg;
-         prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
-         prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+         if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
+              v32.run_heuristic(ctrl)) {
+            simd32_cfg = v32.cfg;
+            prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
+            prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+         }
       }
    }
 
@@ -7292,8 +7307,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
    }
 
    if (simd32_cfg) {
-      prog_data->dispatch_32 = true;
-      prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
+      uint32_t offset = g.generate_code(simd32_cfg, 32);
+
+      if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+          (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+           (!simd16_cfg ||
+            (simd16_cfg &&
+             (!ctrl->inst_count_check ||
+             (ctrl->inst_count_check &&
+             (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= 
ctrl->inst_count_ratio)))))) {
+         prog_data->dispatch_32 = true;
+         prog_data->prog_offset_32 = offset;
+      }
    }
 
    return g.get_assembly();
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to