Module: Mesa
Branch: main
Commit: ebba3cad8160e17f7c77a58c43c66d4f1ccc0d81
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ebba3cad8160e17f7c77a58c43c66d4f1ccc0d81

Author: Jason Ekstrand <[email protected]>
Date:   Sun May  2 17:19:02 2021 -0500

intel/vec4: Add support for UBO pushing

Shader-db results on Haswell (vec4 only):

    total instructions in shared programs: 2853928 -> 2726576 (-4.46%)
    instructions in affected programs: 855840 -> 728488 (-14.88%)
    helped: 9500
    HURT: 18
    helped stats (abs) min: 1 max: 359 x̄: 13.54 x̃: 11
    helped stats (rel) min: 0.44% max: 53.33% x̄: 19.13% x̃: 17.44%
    HURT stats (abs)   min: 4 max: 124 x̄: 71.00 x̃: 92
    HURT stats (rel)   min: 3.64% max: 77.86% x̄: 46.43% x̃: 52.12%
    95% mean confidence interval for instructions value: -13.78 -12.98
    95% mean confidence interval for instructions %-change: -19.21% -18.81%
    Instructions are helped.

    total cycles in shared programs: 101822616 -> 60245580 (-40.83%)
    cycles in affected programs: 93312382 -> 51735346 (-44.56%)
    helped: 13292
    HURT: 4506
    helped stats (abs) min: 2 max: 1229260 x̄: 3370.82 x̃: 776
    helped stats (rel) min: 0.04% max: 96.70% x̄: 47.56% x̃: 43.76%
    HURT stats (abs)   min: 2 max: 17644 x̄: 716.37 x̃: 82
    HURT stats (rel)   min: 0.02% max: 491.80% x̄: 41.00% x̃: 11.11%
    95% mean confidence interval for cycles value: -3037.07 -1635.03
    95% mean confidence interval for cycles %-change: -26.03% -24.25%
    Cycles are helped.

    total spills in shared programs: 1080 -> 1314 (21.67%)
    spills in affected programs: 74 -> 308 (316.22%)
    helped: 0
    HURT: 47

    total fills in shared programs: 310 -> 497 (60.32%)
    fills in affected programs: 71 -> 258 (263.38%)
    helped: 0
    HURT: 47

    total sends in shared programs: 239884 -> 151799 (-36.72%)
    sends in affected programs: 129302 -> 41217 (-68.12%)
    helped: 9547
    HURT: 0
    helped stats (abs) min: 1 max: 226 x̄: 9.23 x̃: 8
    helped stats (rel) min: 3.12% max: 98.15% x̄: 72.38% x̃: 80.00%
    95% mean confidence interval for sends value: -9.48 -8.98
    95% mean confidence interval for sends %-change: -72.80% -71.97%
    Sends are helped.

Reviewed-by: Kenneth Graunke <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10571>

---

 src/intel/compiler/brw_fs.h                     |  2 --
 src/intel/compiler/brw_nir_analyze_ubo_ranges.c |  3 +--
 src/intel/compiler/brw_shader.h                 |  2 ++
 src/intel/compiler/brw_vec4.cpp                 | 26 ++++++++++++++-------
 src/intel/compiler/brw_vec4_nir.cpp             | 31 +++++++++++++++++++++----
 5 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 8e4212d2273..57193a12b32 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -78,8 +78,6 @@ offset(const fs_reg &reg, const brw::fs_builder &bld, 
unsigned delta)
    return offset(reg, bld.dispatch_width(), delta);
 }
 
-#define UBO_START ((1 << 16) - 4)
-
 struct shader_stats {
    const char *scheduler_mode;
    unsigned promoted_constants;
diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 
b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
index 34568ac69c9..2fa4dd5908a 100644
--- a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
+++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
@@ -202,8 +202,7 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler 
*compiler,
 {
    const struct intel_device_info *devinfo = compiler->devinfo;
 
-   if ((devinfo->verx10 <= 70) ||
-       !compiler->scalar_stage[nir->info.stage]) {
+   if (devinfo->verx10 <= 70) {
       memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range));
       return;
    }
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 7b7035293ed..20e9281b7d0 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -40,6 +40,8 @@ enum instruction_scheduler_mode {
    SCHEDULE_POST,
 };
 
+#define UBO_START ((1 << 16) - 4)
+
 struct backend_shader {
 protected:
 
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 4e215c88b0b..56031942fb0 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -593,7 +593,7 @@ vec4_visitor::split_uniform_registers()
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (int i = 0 ; i < 3; i++) {
-        if (inst->src[i].file != UNIFORM)
+         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
            continue;
 
         assert(!inst->src[i].reladdr);
@@ -672,7 +672,7 @@ vec4_visitor::pack_uniform_registers()
       }
 
       for (int i = 0 ; i < 3; i++) {
-         if (inst->src[i].file != UNIFORM)
+         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
             continue;
 
          assert(type_sz(inst->src[i].type) % 4 == 0);
@@ -782,7 +782,7 @@ vec4_visitor::pack_uniform_registers()
       for (int i = 0 ; i < 3; i++) {
          int src = inst->src[i].nr;
 
-         if (inst->src[i].file != UNIFORM)
+         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
             continue;
 
          int chan = new_chan[src] / channel_sizes[src];
@@ -977,7 +977,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
     */
    foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
       for (int i = 0 ; i < 3; i++) {
-         if (inst->src[i].file != UNIFORM ||
+         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START ||
              pull_constant_loc[inst->src[i].nr] == -1)
             continue;
 
@@ -2078,11 +2078,19 @@ vec4_visitor::convert_to_hw_regs()
          }
 
          case UNIFORM: {
-            reg = stride(byte_offset(brw_vec4_grf(
-                                        prog_data->base.dispatch_grf_start_reg 
+
-                                        src.nr / 2, src.nr % 2 * 4),
-                                     src.offset),
-                         0, 4, 1);
+            if (src.nr >= UBO_START) {
+               reg = byte_offset(brw_vec4_grf(
+                                    prog_data->base.dispatch_grf_start_reg +
+                                    ubo_push_start[src.nr - UBO_START] +
+                                    src.offset / 32, 0),
+                                 src.offset % 32);
+            } else {
+               reg = byte_offset(brw_vec4_grf(
+                                    prog_data->base.dispatch_grf_start_reg +
+                                    src.nr / 2, src.nr % 2 * 4),
+                                 src.offset);
+            }
+            reg = stride(reg, 0, 4, 1);
             reg.type = src.type;
             reg.abs = src.abs;
             reg.negate = src.negate;
diff --git a/src/intel/compiler/brw_vec4_nir.cpp 
b/src/intel/compiler/brw_vec4_nir.cpp
index 636dbfb6ea0..4c5dfd00f84 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -624,8 +624,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_ubo: {
       src_reg surf_index;
 
-      prog_data->base.has_ubo_pull = true;
-
       dest = get_nir_dest(instr->dest);
 
       if (nir_src_is_const(instr->src[0])) {
@@ -647,10 +645,31 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
          surf_index = emit_uniformize(surf_index);
       }
 
+      src_reg push_reg;
       src_reg offset_reg;
       if (nir_src_is_const(instr->src[1])) {
          unsigned load_offset = nir_src_as_uint(instr->src[1]);
-         offset_reg = brw_imm_ud(load_offset & ~15);
+         unsigned aligned_offset = load_offset & ~15;
+         offset_reg = brw_imm_ud(aligned_offset);
+
+         /* See if we've selected this as a push constant candidate */
+         if (nir_src_is_const(instr->src[0])) {
+            const unsigned ubo_block = nir_src_as_uint(instr->src[0]);
+            const unsigned offset_256b = aligned_offset / 32;
+
+            for (int i = 0; i < 4; i++) {
+               const struct brw_ubo_range *range = 
&prog_data->base.ubo_ranges[i];
+               if (range->block == ubo_block &&
+                   offset_256b >= range->start &&
+                   offset_256b < range->start + range->length) {
+
+                  push_reg = src_reg(dst_reg(UNIFORM, UBO_START + i));
+                  push_reg.type = dest.type;
+                  push_reg.offset = aligned_offset - 32 * range->start;
+                  break;
+               }
+            }
+         }
       } else {
          offset_reg = src_reg(this, glsl_type::uint_type);
          emit(MOV(dst_reg(offset_reg),
@@ -658,12 +677,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr 
*instr)
       }
 
       src_reg packed_consts;
-      if (nir_dest_bit_size(instr->dest) == 32) {
+      if (push_reg.file != BAD_FILE) {
+         packed_consts = push_reg;
+      } else if (nir_dest_bit_size(instr->dest) == 32) {
          packed_consts = src_reg(this, glsl_type::vec4_type);
          emit_pull_constant_load_reg(dst_reg(packed_consts),
                                      surf_index,
                                      offset_reg,
                                      NULL, NULL /* before_block/inst */);
+         prog_data->base.has_ubo_pull = true;
       } else {
          src_reg temp = src_reg(this, glsl_type::dvec4_type);
          src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
@@ -676,6 +698,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
             emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
          emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, 
REG_SIZE)),
                                      surf_index, offset_reg, NULL, NULL);
+         prog_data->base.has_ubo_pull = true;
 
          packed_consts = src_reg(this, glsl_type::dvec4_type);
          shuffle_64bit_data(dst_reg(packed_consts), temp, false);

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to