Module: Mesa
Branch: main
Commit: 3c59df73185852516c6ebcd9f62ee2cad07d0eb4
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c59df73185852516c6ebcd9f62ee2cad07d0eb4

Author: Qiang Yu <[email protected]>
Date:   Wed Apr 26 14:58:58 2023 +0800

aco: get scratch addr from symbol for radeonsi

Reviewed-by: Rhys Perry <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22727>

---

 src/amd/compiler/aco_instruction_selection.cpp | 40 ++++++++++++++++----------
 src/amd/compiler/aco_lower_to_hw_instr.cpp     |  9 +++++-
 src/amd/compiler/aco_shader_info.h             |  2 ++
 src/amd/compiler/aco_spill.cpp                 | 10 ++++++-
 src/amd/compiler/aco_validate.cpp              |  3 +-
 5 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 4e3990e4fb6..c9b78c520d6 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7519,9 +7519,16 @@ get_scratch_resource(isel_context* ctx)
 {
    Builder bld(ctx->program, ctx->block);
    Temp scratch_addr = ctx->program->private_segment_buffer;
-   if (ctx->stage.hw != HWStage::CS)
+   if (!scratch_addr.bytes()) {
+      Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+                              Operand::c32(aco_symbol_scratch_addr_lo));
+      Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+                              Operand::c32(aco_symbol_scratch_addr_hi));
+      scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), 
addr_lo, addr_hi);
+   } else if (ctx->stage.hw != HWStage::CS) {
       scratch_addr =
          bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, 
Operand::zero());
+   }
 
    uint32_t rsrc_conf =
       S_008F0C_ADD_TID_ENABLE(1) | 
S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2);
@@ -11138,22 +11145,25 @@ add_startpgm(struct isel_context* ctx)
       }
    }
 
-   if (ctx->args->ring_offsets.used) {
-      if (ctx->program->gfx_level < GFX9) {
-         /* Stash these in the program so that they can be accessed later when
-          * handling spilling.
-          */
+   if (ctx->program->gfx_level < GFX9) {
+      /* Stash these in the program so that they can be accessed later when
+       * handling spilling.
+       */
+      if (ctx->args->ring_offsets.used)
          ctx->program->private_segment_buffer = get_arg(ctx, 
ctx->args->ring_offsets);
-         ctx->program->scratch_offset = get_arg(ctx, 
ctx->args->scratch_offset);
 
-      } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != 
raytracing_cs) {
-         /* Manually initialize scratch. For RT stages scratch initialization 
is done in the prolog. */
-         Operand scratch_offset = Operand(get_arg(ctx, 
ctx->args->scratch_offset));
-         scratch_offset.setLateKill(true);
-         Builder bld(ctx->program, ctx->block);
-         bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
-                    get_arg(ctx, ctx->args->ring_offsets), scratch_offset);
-      }
+      ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
+   } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != 
raytracing_cs) {
+      /* Manually initialize scratch. For RT stages scratch initialization is 
done in the prolog. */
+      Operand scratch_offset = Operand(get_arg(ctx, 
ctx->args->scratch_offset));
+      scratch_offset.setLateKill(true);
+
+      Operand scratch_addr = ctx->args->ring_offsets.used ?
+         Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
+
+      Builder bld(ctx->program, ctx->block);
+      bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
+                 scratch_addr, scratch_offset);
    }
 
    return startpgm;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 19e8ec70609..a008cc04d7d 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2489,7 +2489,14 @@ lower_to_hw_instr(Program* program)
                   break;
 
                Operand scratch_addr = instr->operands[0];
-               if (program->stage.hw != HWStage::CS) {
+               if (scratch_addr.isUndefined()) {
+                  PhysReg reg = instr->definitions[0].physReg();
+                  bld.sop1(aco_opcode::p_load_symbol, Definition(reg, s1),
+                           Operand::c32(aco_symbol_scratch_addr_lo));
+                  bld.sop1(aco_opcode::p_load_symbol, 
Definition(reg.advance(4), s1),
+                           Operand::c32(aco_symbol_scratch_addr_hi));
+                  scratch_addr.setFixed(reg);
+               } else if (program->stage.hw != HWStage::CS) {
                   bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], 
scratch_addr,
                            Operand::zero());
                   scratch_addr.setFixed(instr->definitions[0].physReg());
diff --git a/src/amd/compiler/aco_shader_info.h 
b/src/amd/compiler/aco_shader_info.h
index 0d1dcd96392..af741d1d840 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -162,6 +162,8 @@ enum aco_statistic {
 
 enum aco_symbol_id {
    aco_symbol_invalid,
+   aco_symbol_scratch_addr_lo,
+   aco_symbol_scratch_addr_hi,
 };
 
 struct aco_symbol {
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index d2cbca24405..3f8426921ae 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1416,9 +1416,17 @@ load_scratch_resource(spill_ctx& ctx, Temp& 
scratch_offset, Block& block,
       return bld.copy(bld.def(s1), Operand::c32(offset));
 
    Temp private_segment_buffer = ctx.program->private_segment_buffer;
-   if (ctx.program->stage.hw != HWStage::CS)
+   if (!private_segment_buffer.bytes()) {
+      Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+                              Operand::c32(aco_symbol_scratch_addr_lo));
+      Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+                              Operand::c32(aco_symbol_scratch_addr_hi));
+      private_segment_buffer =
+         bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, 
addr_hi);
+   } else if (ctx.program->stage.hw != HWStage::CS) {
       private_segment_buffer =
          bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), 
private_segment_buffer, Operand::zero());
+   }
 
    if (offset)
       scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), 
bld.def(s1, scc),
diff --git a/src/amd/compiler/aco_validate.cpp 
b/src/amd/compiler/aco_validate.cpp
index abf0a592591..d4073296ae9 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -270,7 +270,8 @@ validate_ir(Program* program)
                                    (instr->opcode == 
aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
                                    (flat && i == 1) || (instr->isMIMG() && (i 
== 1 || i == 2)) ||
                                    ((instr->isMUBUF() || instr->isMTBUF()) && 
i == 1) ||
-                                   (instr->isScratch() && i == 0);
+                                   (instr->isScratch() && i == 0) ||
+                                   (instr->opcode == 
aco_opcode::p_init_scratch && i == 0);
                check(can_be_undef, "Undefs can only be used in certain 
operands", instr.get());
             } else {
                check(instr->operands[i].isFixed() || 
instr->operands[i].isTemp() ||

Reply via email to