Module: Mesa
Branch: main
Commit: 350c56b1c3e2ee087a6e336befe68b2c03a41037
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=350c56b1c3e2ee087a6e336befe68b2c03a41037

Author: Gert Wollny <[email protected]>
Date:   Mon Oct 31 15:08:14 2022 +0100

r600/sfn: lower uniforms to UBOs

Signed-off-by: Gert Wollny <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19416>

---

 src/gallium/drivers/r600/r600_pipe_common.c        |  3 +-
 src/gallium/drivers/r600/sfn/sfn_assembler.cpp     |  1 -
 src/gallium/drivers/r600/sfn/sfn_nir.cpp           | 73 +++++++++++++++++++++-
 .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp       |  2 +-
 src/gallium/drivers/r600/sfn/sfn_shader.cpp        | 11 ++--
 src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp |  4 +-
 src/gallium/drivers/r600/sfn/sfn_virtualvalues.h   |  2 +-
 .../drivers/r600/sfn/tests/sfn_value_test.cpp      |  2 +-
 8 files changed, 85 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe_common.c 
b/src/gallium/drivers/r600/r600_pipe_common.c
index 41c0d896816..51b045afbd2 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1364,7 +1364,8 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
                .linker_ignore_precision = true,
                .lower_fpow = true,
                .lower_int64_options = ~0,
-               .lower_cs_local_index_to_id = true
+               .lower_cs_local_index_to_id = true,
+               .lower_uniforms_to_ubo = true
        };
 
        rscreen->nir_options = nir_options;
diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp 
b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp
index 6fa49287c63..1df818912c3 100644
--- a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp
@@ -307,7 +307,6 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
 
       if (buffer_offset && kcache_index_mode == bim_none) {
          kcache_index_mode = bim_zero;
-         alu.src[i].kc_bank = 1;
          alu.src[i].kc_rel = 1;
       }
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp 
b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
index fa7d51f67f7..6347644e4dd 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -28,7 +28,9 @@
 
 #include "../r600_pipe.h"
 #include "../r600_shader.h"
+#include "nir.h"
 #include "nir_builder.h"
+#include "nir_intrinsics.h"
 #include "sfn_assembler.h"
 #include "sfn_debug.h"
 #include "sfn_instr_tex.h"
@@ -226,8 +228,7 @@ private:
       auto intr = nir_instr_as_intrinsic(instr);
       nir_ssa_def *output[8] = {nullptr};
 
-      // for UBO loads we correct the buffer ID by adding 1
-      auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1);
+      auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER);
 
       assert(intr->src[0].is_ssa);
       auto clip_vtx = intr->src[0].ssa;
@@ -270,6 +271,67 @@ private:
    pipe_stream_output_info& m_so_info;
 };
 
+/* lower_uniforms_to_ubo adds a 1 to the UBO buffer ID.
+ * If the buffer ID is a non-constant value we end up
+ * with "iadd bufid, 1", bot on r600 we can put that constant
+ * "1" as constant cache ID into the CF instruction and don't need
+ * to execute that extra ADD op, so eliminate the addition here
+ * again and move the buffer base ID into the base value of
+ * the intrinsic that is not used otherwise */
+class OptIndirectUBOLoads : public NirLowerInstruction {
+private:
+   bool filter(const nir_instr *instr) const override
+   {
+      if (instr->type != nir_instr_type_intrinsic)
+         return false;
+
+      auto intr = nir_instr_as_intrinsic(instr);
+      if (intr->intrinsic != nir_intrinsic_load_ubo_vec4)
+         return false;
+
+      if (nir_src_as_const_value(intr->src[0]) != nullptr)
+         return false;
+
+      return nir_intrinsic_base(intr) == 0;
+   }
+
+   nir_ssa_def *lower(nir_instr *instr) override
+   {
+      auto intr = nir_instr_as_intrinsic(instr);
+      assert(intr->intrinsic == nir_intrinsic_load_ubo_vec4);
+      assert(intr->src[0].is_ssa);
+
+      auto parent = intr->src[0].ssa->parent_instr;
+
+      if (parent->type != nir_instr_type_alu)
+         return nullptr;
+
+      auto alu = nir_instr_as_alu(parent);
+
+      if (alu->op != nir_op_iadd)
+         return nullptr;
+
+      int new_base = 0;
+      nir_src *new_bufid = nullptr;
+      auto src0 = nir_src_as_const_value(alu->src[0].src);
+      if (src0) {
+         new_bufid = &alu->src[1].src;
+         new_base = src0->i32;
+      } else if (auto src1 = nir_src_as_const_value(alu->src[1].src)) {
+         new_bufid = &alu->src[0].src;
+         new_base = src1->i32;
+      } else {
+         return nullptr;
+      }
+
+      assert(new_bufid->is_ssa);
+
+      nir_intrinsic_set_base(intr, new_base);
+      nir_instr_rewrite_src(instr, &intr->src[0], 
nir_src_for_ssa(new_bufid->ssa));
+      return &intr->dest.ssa;
+   }
+};
+
 } // namespace r600
 
 static nir_intrinsic_op
@@ -558,6 +620,12 @@ r600_lower_fs_pos_input(nir_shader *shader)
                                         nullptr);
 };
 
+bool
+r600_opt_indirect_fbo_loads(nir_shader *shader)
+{
+   return r600::OptIndirectUBOLoads().run(shader);
+}
+
 static bool
 optimize_once(nir_shader *shader)
 {
@@ -788,6 +856,7 @@ r600_shader_from_nir(struct r600_context *rctx,
    }
 
    NIR_PASS_V(sh, nir_lower_ubo_vec4);
+   NIR_PASS_V(sh, r600_opt_indirect_fbo_loads);
 
    if (lower_64bit)
       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp 
b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
index dbf5b4212f0..6d7b07ceb57 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@@ -665,7 +665,7 @@ 
LowerSplit64BitVar::split_double_load_ubo(nir_intrinsic_instr *intr)
    nir_intrinsic_set_range(load2, nir_intrinsic_range(intr));
    nir_intrinsic_set_access(load2, nir_intrinsic_access(intr));
    nir_intrinsic_set_align_mul(load2, nir_intrinsic_align_mul(intr));
-   nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr) + 
16);
+   nir_intrinsic_set_align_offset(load2, nir_intrinsic_align_offset(intr));
 
    load2->num_components = second_components;
 
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp 
b/src/gallium/drivers/r600/sfn/sfn_shader.cpp
index 538be0c1003..8d6e221cdd1 100644
--- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp
@@ -28,6 +28,7 @@
 
 #include "gallium/drivers/r600/r600_shader.h"
 #include "nir.h"
+#include "nir_intrinsics.h"
 #include "sfn_debug.h"
 #include "sfn_instr.h"
 #include "sfn_instr_alugroup.h"
@@ -1282,6 +1283,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
 {
    auto bufid = nir_src_as_const_value(instr->src[0]);
    auto buf_offset = nir_src_as_const_value(instr->src[1]);
+   auto base_id = nir_intrinsic_base(instr);
 
    if (!buf_offset) {
       /* TODO: if bufid is constant then this can also be solved by using the
@@ -1299,11 +1301,11 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
       LoadFromBuffer *ir;
       if (bufid) {
          ir = new LoadFromBuffer(
-            dest, dest_swz, addr, 0, 1 + bufid->u32, nullptr, 
fmt_32_32_32_32_float);
+            dest, dest_swz, addr, 0, bufid->u32, nullptr, 
fmt_32_32_32_32_float);
       } else {
          auto buffer_id = 
emit_load_to_register(value_factory().src(instr->src[0], 0));
          ir = new LoadFromBuffer(
-            dest, dest_swz, addr, 0, 1, buffer_id, fmt_32_32_32_32_float);
+            dest, dest_swz, addr, 0, base_id, buffer_id, 
fmt_32_32_32_32_float);
       }
       emit_instruction(ir);
       return true;
@@ -1323,7 +1325,7 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
                  << " const[" << i << "]: " << instr->const_index[i] << "\n";
 
          auto uniform =
-            value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, 
bufid->u32 + 1);
+            value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, 
bufid->u32);
          ir = new AluInstr(op1_mov,
                            value_factory().dest(instr->dest, i, pin),
                            uniform,
@@ -1340,7 +1342,8 @@ Shader::load_ubo(nir_intrinsic_instr *instr)
 
       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
          int cmp = buf_cmp + i;
-         auto u = new UniformValue(512 + buf_offset->u32, cmp, kc_id);
+         auto u =
+            new UniformValue(512 + buf_offset->u32, cmp, kc_id, 
nir_intrinsic_base(instr));
          auto dest = value_factory().dest(instr->dest, i, pin_none);
          ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
          emit_instruction(ir);
diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp 
b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
index a21dc87180a..7c6e7d62cb3 100644
--- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp
@@ -731,9 +731,9 @@ UniformValue::UniformValue(int sel, int chan, int 
kcache_bank):
 {
 }
 
-UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr):
+UniformValue::UniformValue(int sel, int chan, PVirtualValue buf_addr, int 
kcache_bank):
     VirtualValue(sel, chan, pin_none),
-    m_kcache_bank(0),
+    m_kcache_bank(kcache_bank),
     m_buf_addr(buf_addr)
 {
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h 
b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
index f56a5ac1a93..0ddbe8e9a73 100644
--- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
+++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h
@@ -379,7 +379,7 @@ public:
    using Pointer = R600_POINTER_TYPE(UniformValue);
 
    UniformValue(int sel, int chan, int kcache_bank = 0);
-   UniformValue(int sel, int chan, PVirtualValue buf_addr);
+   UniformValue(int sel, int chan, PVirtualValue buf_addr, int kcache_bank);
 
    void accept(RegisterVisitor& vistor) override;
    void accept(ConstRegisterVisitor& vistor) const override;
diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp 
b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
index edd2024c16b..bc69faad87b 100644
--- a/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
+++ b/src/gallium/drivers/r600/sfn/tests/sfn_value_test.cpp
@@ -100,7 +100,7 @@ TEST_F(ValueTest, uniform_value)
    auto addr = new Register(1024, 0, pin_none);
    ASSERT_TRUE(addr);
 
-   UniformValue reg_with_buffer_addr(513, 0, addr);
+   UniformValue reg_with_buffer_addr(513, 0, addr, 0);
 
    EXPECT_EQ(reg_with_buffer_addr.sel(), 513);
    EXPECT_EQ(reg_with_buffer_addr.chan(), 0);

Reply via email to