Module: Mesa Branch: main Commit: 350c56b1c3e2ee087a6e336befe68b2c03a41037 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=350c56b1c3e2ee087a6e336befe68b2c03a41037
Author: Gert Wollny <[email protected]> Date: Mon Oct 31 15:08:14 2022 +0100 r600/sfn: lower uniforms to UBOs Signed-off-by: Gert Wollny <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19416> --- src/gallium/drivers/r600/r600_pipe_common.c | 3 +- src/gallium/drivers/r600/sfn/sfn_assembler.cpp | 1 - src/gallium/drivers/r600/sfn/sfn_nir.cpp | 73 +++++++++++++++++++++- .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp | 2 +- src/gallium/drivers/r600/sfn/sfn_shader.cpp | 11 ++-- src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp | 4 +- src/gallium/drivers/r600/sfn/sfn_virtualvalues.h | 2 +- .../drivers/r600/sfn/tests/sfn_value_test.cpp | 2 +- 8 files changed, 85 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 41c0d896816..51b045afbd2 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1364,7 +1364,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, .linker_ignore_precision = true, .lower_fpow = true, .lower_int64_options = ~0, - .lower_cs_local_index_to_id = true + .lower_cs_local_index_to_id = true, + .lower_uniforms_to_ubo = true }; rscreen->nir_options = nir_options; diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp index 6fa49287c63..1df818912c3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp @@ -307,7 +307,6 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai) if (buffer_offset && kcache_index_mode == bim_none) { kcache_index_mode = bim_zero; - alu.src[i].kc_bank = 1; alu.src[i].kc_rel = 1; } diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index fa7d51f67f7..6347644e4dd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -28,7 +28,9 @@ #include "../r600_pipe.h" #include "../r600_shader.h" +#include "nir.h" #include "nir_builder.h" +#include "nir_intrinsics.h" #include "sfn_assembler.h" #include "sfn_debug.h" #include "sfn_instr_tex.h" @@ -226,8 +228,7 @@ private: auto intr = nir_instr_as_intrinsic(instr); nir_ssa_def *output[8] = {nullptr}; - // for UBO loads we correct the buffer ID by adding 1 - auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1); + auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER); assert(intr->src[0].is_ssa); auto clip_vtx = intr->src[0].ssa; @@ -270,6 +271,67 @@ private: pipe_stream_output_info& m_so_info; }; +/* lower_uniforms_to_ubo adds a 1 to the UBO buffer ID. + * If the buffer ID is a non-constant value we end up + * with "iadd bufid, 1", bot on r600 we can put that constant + * "1" as constant cache ID into the CF instruction and don't need + * to execute that extra ADD op, so eliminate the addition here + * again and move the buffer base ID into the base value of + * the intrinsic that is not used otherwise */ +class OptIndirectUBOLoads : public NirLowerInstruction { +private: + bool filter(const nir_instr *instr) const override + { + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_ubo_vec4) + return false; + + if (nir_src_as_const_value(intr->src[0]) != nullptr) + return false; + + return nir_intrinsic_base(intr) == 0; + } + + nir_ssa_def *lower(nir_instr *instr) override + { + auto intr = nir_instr_as_intrinsic(instr); + assert(intr->intrinsic == nir_intrinsic_load_ubo_vec4); + assert(intr->src[0].is_ssa); + + auto parent = intr->src[0].ssa->parent_instr; + + if (parent->type != nir_instr_type_alu) + return nullptr; + + auto alu = nir_instr_as_alu(parent); + + if (alu->op != nir_op_iadd) + return nullptr; + + int new_base = 0; + nir_src *new_bufid = nullptr; + auto src0 = nir_src_as_const_value(alu->src[0].src); + if (src0) { + new_bufid = &alu->src[1].src; + new_base = src0->i32; + } else if (auto src1 = nir_src_as_const_value(alu->src[1].src)) { + new_bufid = &alu->src[0].src; + new_base = src1->i32; + } else { + return nullptr; + } + + assert(new_bufid->is_ssa); + + nir_intrinsic_set_base(intr, new_base); + nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(new_bufid->ssa)); + return &intr->dest.ssa; + } +}; + } // namespace r600 static nir_intrinsic_op @@ -558,6 +620,12 @@ r600_lower_fs_pos_input(nir_shader *shader) nullptr); }; +bool +r600_opt_indirect_fbo_loads(nir_shader *shader) +{ + return r600::OptIndirectUBOLoads().run(shader); +} + static bool optimize_once(nir_shader *shader) { @@ -788,6 +856,7 @@ r600_shader_from_nir(struct r600_context *rctx, } NIR_PASS_V(sh, nir_lower_ubo_vec4); + NIR_PASS_V(sh, r600_opt_indirect_fbo_loads); if (lower_64bit) NIR_PASS_V(sh, r600::r600_nir_64_to_vec2); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp index dbf5b4212f0..6d7b07ceb57 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp @@ -665,7 +665,7 @@ LowerSplit64BitVar::split_double_load_ubo(nir_intrinsic_instr *intr) nir_intrinsic_set_range(load2, nir_intrinsic_range(intr)); nir_intrinsic_set_access(load2, nir_intrinsic_access(intr)); nir_intrinsic_set_align_mul(load2, nir_intrinsic_align_mul(intr)); - nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr) + 16); + nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr)); load2->num_components = second_components; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 538be0c1003..8d6e221cdd1 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -28,6 +28,7 @@ #include "gallium/drivers/r600/r600_shader.h" #include "nir.h" +#include "nir_intrinsics.h" #include "sfn_debug.h" #include "sfn_instr.h" #include "sfn_instr_alugroup.h" @@ -1282,6 +1283,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr) { auto bufid = nir_src_as_const_value(instr->src[0]); auto buf_offset = nir_src_as_const_value(instr->src[1]); + auto base_id = nir_intrinsic_base(instr); if (!buf_offset) { /* TODO: if bufid is constant then this can also be solved by using the @@ -1299,11 +1301,11 @@ Shader::load_ubo(nir_intrinsic_instr *instr) LoadFromBuffer *ir; if (bufid) { ir = new LoadFromBuffer( - dest, dest_swz, addr, 0, 1 + bufid->u32, nullptr, fmt_32_32_32_32_float); + dest, dest_swz, addr, 0, bufid->u32, nullptr, fmt_32_32_32_32_float); } else { auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0)); ir = new LoadFromBuffer( - dest, dest_swz, addr, 0, 1, buffer_id, fmt_32_32_32_32_float); + dest, dest_swz, addr, 0, base_id, buffer_id, fmt_32_32_32_32_float); } emit_instruction(ir); return true; @@ -1323,7 +1325,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr) << " const[" << i << "]: " << instr->const_index[i] << "\n"; auto uniform = - value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32 + 1); + value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32); ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin), uniform, @@ -1340,7 +1342,8 @@ Shader::load_ubo(nir_intrinsic_instr *instr) for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { int cmp = buf_cmp + i; - auto u = new UniformValue(512 + buf_offset->u32, cmp, kc_id); + auto u = + new UniformValue(512 + buf_offset->u32, cmp, kc_id, nir_intrinsic_base(instr)); auto dest = value_factory().dest(instr->dest, i, pin_none); ir = new AluInstr(op1_mov, dest, u, AluInstr::write); emit_instruction(ir); diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp index a21dc87180a..7c6e7d62cb3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp @@ -731,9 +731,9 @@ UniformValue::UniformValue(int sel, int chan, int kcache_bank): { } -UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr): +UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr, int kcache_bank): VirtualValue(sel, chan, pin_none), - m_kcache_bank(0), + m_kcache_bank(kcache_bank), m_buf_addr(buf_addr) { } diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h index f56a5ac1a93..0ddbe8e9a73 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h @@ -379,7 +379,7 @@ public: using Pointer = R600_POINTER_TYPE(UniformValue); UniformValue(int sel, int chan, int kcache_bank = 0); - UniformValue(int sel, int chan, PVirtualValue buf_addr); + UniformValue(int sel, int chan, PVirtualValue buf_addr, int kcache_bank); void accept(RegisterVisitor& vistor) override; void accept(ConstRegisterVisitor& vistor) const override; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp index edd2024c16b..bc69faad87b 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp @@ -100,7 +100,7 @@ TEST_F(ValueTest, uniform_value) auto addr = new Register(1024, 0, pin_none); ASSERT_TRUE(addr); - UniformValue reg_with_buffer_addr(513, 0, addr); + UniformValue reg_with_buffer_addr(513, 0, addr, 0); EXPECT_EQ(reg_with_buffer_addr.sel(), 513); EXPECT_EQ(reg_with_buffer_addr.chan(), 0);
