Module: Mesa Branch: main Commit: ebba3cad8160e17f7c77a58c43c66d4f1ccc0d81 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ebba3cad8160e17f7c77a58c43c66d4f1ccc0d81
Author: Jason Ekstrand <[email protected]> Date: Sun May 2 17:19:02 2021 -0500 intel/vec4: Add support for UBO pushing Shader-db results on Haswell (vec4 only): total instructions in shared programs: 2853928 -> 2726576 (-4.46%) instructions in affected programs: 855840 -> 728488 (-14.88%) helped: 9500 HURT: 18 helped stats (abs) min: 1 max: 359 x̄: 13.54 x̃: 11 helped stats (rel) min: 0.44% max: 53.33% x̄: 19.13% x̃: 17.44% HURT stats (abs) min: 4 max: 124 x̄: 71.00 x̃: 92 HURT stats (rel) min: 3.64% max: 77.86% x̄: 46.43% x̃: 52.12% 95% mean confidence interval for instructions value: -13.78 -12.98 95% mean confidence interval for instructions %-change: -19.21% -18.81% Instructions are helped. total cycles in shared programs: 101822616 -> 60245580 (-40.83%) cycles in affected programs: 93312382 -> 51735346 (-44.56%) helped: 13292 HURT: 4506 helped stats (abs) min: 2 max: 1229260 x̄: 3370.82 x̃: 776 helped stats (rel) min: 0.04% max: 96.70% x̄: 47.56% x̃: 43.76% HURT stats (abs) min: 2 max: 17644 x̄: 716.37 x̃: 82 HURT stats (rel) min: 0.02% max: 491.80% x̄: 41.00% x̃: 11.11% 95% mean confidence interval for cycles value: -3037.07 -1635.03 95% mean confidence interval for cycles %-change: -26.03% -24.25% Cycles are helped. total spills in shared programs: 1080 -> 1314 (21.67%) spills in affected programs: 74 -> 308 (316.22%) helped: 0 HURT: 47 total fills in shared programs: 310 -> 497 (60.32%) fills in affected programs: 71 -> 258 (263.38%) helped: 0 HURT: 47 total sends in shared programs: 239884 -> 151799 (-36.72%) sends in affected programs: 129302 -> 41217 (-68.12%) helped: 9547 HURT: 0 helped stats (abs) min: 1 max: 226 x̄: 9.23 x̃: 8 helped stats (rel) min: 3.12% max: 98.15% x̄: 72.38% x̃: 80.00% 95% mean confidence interval for sends value: -9.48 -8.98 95% mean confidence interval for sends %-change: -72.80% -71.97% Sends are helped. Reviewed-by: Kenneth Graunke <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10571> --- src/intel/compiler/brw_fs.h | 2 -- src/intel/compiler/brw_nir_analyze_ubo_ranges.c | 3 +-- src/intel/compiler/brw_shader.h | 2 ++ src/intel/compiler/brw_vec4.cpp | 26 ++++++++++++++------- src/intel/compiler/brw_vec4_nir.cpp | 31 +++++++++++++++++++++---- 5 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8e4212d2273..57193a12b32 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -78,8 +78,6 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta) return offset(reg, bld.dispatch_width(), delta); } -#define UBO_START ((1 << 16) - 4) - struct shader_stats { const char *scheduler_mode; unsigned promoted_constants; diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c index 34568ac69c9..2fa4dd5908a 100644 --- a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c +++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c @@ -202,8 +202,7 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, { const struct intel_device_info *devinfo = compiler->devinfo; - if ((devinfo->verx10 <= 70) || - !compiler->scalar_stage[nir->info.stage]) { + if (devinfo->verx10 <= 70) { memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range)); return; } diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 7b7035293ed..20e9281b7d0 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -40,6 +40,8 @@ enum instruction_scheduler_mode { SCHEDULE_POST, }; +#define UBO_START ((1 << 16) - 4) + struct backend_shader { protected: diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 4e215c88b0b..56031942fb0 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -593,7 +593,7 @@ vec4_visitor::split_uniform_registers() */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; assert(!inst->src[i].reladdr); @@ -672,7 +672,7 @@ vec4_visitor::pack_uniform_registers() } for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; assert(type_sz(inst->src[i].type) % 4 == 0); @@ -782,7 +782,7 @@ vec4_visitor::pack_uniform_registers() for (int i = 0 ; i < 3; i++) { int src = inst->src[i].nr; - if (inst->src[i].file != UNIFORM) + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START) continue; int chan = new_chan[src] / channel_sizes[src]; @@ -977,7 +977,7 @@ vec4_visitor::move_push_constants_to_pull_constants() */ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM || + if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START || pull_constant_loc[inst->src[i].nr] == -1) continue; @@ -2078,11 +2078,19 @@ vec4_visitor::convert_to_hw_regs() } case UNIFORM: { - reg = stride(byte_offset(brw_vec4_grf( - prog_data->base.dispatch_grf_start_reg + - src.nr / 2, src.nr % 2 * 4), - src.offset), - 0, 4, 1); + if (src.nr >= UBO_START) { + reg = byte_offset(brw_vec4_grf( + prog_data->base.dispatch_grf_start_reg + + ubo_push_start[src.nr - UBO_START] + + src.offset / 32, 0), + src.offset % 32); + } else { + reg = byte_offset(brw_vec4_grf( + prog_data->base.dispatch_grf_start_reg + + src.nr / 2, src.nr % 2 * 4), + src.offset); + } + reg = stride(reg, 0, 4, 1); reg.type = src.type; reg.abs = src.abs; reg.negate = src.negate; diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 636dbfb6ea0..4c5dfd00f84 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -624,8 +624,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_ubo: { src_reg surf_index; - prog_data->base.has_ubo_pull = true; - dest = get_nir_dest(instr->dest); if (nir_src_is_const(instr->src[0])) { @@ -647,10 +645,31 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) surf_index = emit_uniformize(surf_index); } + src_reg push_reg; src_reg offset_reg; if (nir_src_is_const(instr->src[1])) { unsigned load_offset = nir_src_as_uint(instr->src[1]); - offset_reg = brw_imm_ud(load_offset & ~15); + unsigned aligned_offset = load_offset & ~15; + offset_reg = brw_imm_ud(aligned_offset); + + /* See if we've selected this as a push constant candidate */ + if (nir_src_is_const(instr->src[0])) { + const unsigned ubo_block = nir_src_as_uint(instr->src[0]); + const unsigned offset_256b = aligned_offset / 32; + + for (int i = 0; i < 4; i++) { + const struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i]; + if (range->block == ubo_block && + offset_256b >= range->start && + offset_256b < range->start + range->length) { + + push_reg = src_reg(dst_reg(UNIFORM, UBO_START + i)); + push_reg.type = dest.type; + push_reg.offset = aligned_offset - 32 * range->start; + break; + } + } + } } else { offset_reg = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(offset_reg), @@ -658,12 +677,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } src_reg packed_consts; - if (nir_dest_bit_size(instr->dest) == 32) { + if (push_reg.file != BAD_FILE) { + packed_consts = push_reg; + } else if (nir_dest_bit_size(instr->dest) == 32) { packed_consts = src_reg(this, glsl_type::vec4_type); emit_pull_constant_load_reg(dst_reg(packed_consts), surf_index, offset_reg, NULL, NULL /* before_block/inst */); + prog_data->base.has_ubo_pull = true; } else { src_reg temp = src_reg(this, glsl_type::dvec4_type); src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F); @@ -676,6 +698,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u))); emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, REG_SIZE)), surf_index, offset_reg, NULL, NULL); + prog_data->base.has_ubo_pull = true; packed_consts = src_reg(this, glsl_type::dvec4_type); shuffle_64bit_data(dst_reg(packed_consts), temp, false); _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
