Module: Mesa
Branch: main
Commit: 4a6ee2c4833b7be8f2ae9e379433fd855a865de3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a6ee2c4833b7be8f2ae9e379433fd855a865de3

Author: Georg Lehmann <dadschoo...@gmail.com>
Date:   Mon Dec 25 15:32:52 2023 +0100

aco: shrink buffer stores with undef/zero components

Buffer stores store 0 like image stores for unspecified components.

Foz-DB Navi21:
Totals from 91 (0.11% of 79330) affected shaders:
Instrs: 63327 -> 63121 (-0.33%)
CodeSize: 315312 -> 314440 (-0.28%); split: -0.28%, +0.00%
VGPRs: 3144 -> 3120 (-0.76%)
Latency: 441424 -> 441300 (-0.03%); split: -0.03%, +0.00%
InvThroughput: 65501 -> 65130 (-0.57%)
Copies: 6197 -> 5999 (-3.20%)
PreVGPRs: 2197 -> 2182 (-0.68%)

Reviewed-by: Daniel Schürmann <dan...@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26897>

---

 src/amd/compiler/aco_instruction_selection.cpp | 91 +++++++++++++-------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 63573876f2e..a7059f38ed4 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6410,50 +6410,6 @@ visit_image_store(isel_context* ctx, 
nir_intrinsic_instr* instr)
    bool glc = ctx->options->gfx_level == GFX6 ||
               ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && 
ctx->program->gfx_level < GFX11);
 
-   if (dim == GLSL_SAMPLER_DIM_BUF) {
-      Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
-      Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, 
instr->src[1].ssa), 0, v1);
-      aco_opcode opcode;
-      if (!d16) {
-         switch (num_components) {
-         case 1: opcode = aco_opcode::buffer_store_format_x; break;
-         case 2: opcode = aco_opcode::buffer_store_format_xy; break;
-         case 3: opcode = aco_opcode::buffer_store_format_xyz; break;
-         case 4: opcode = aco_opcode::buffer_store_format_xyzw; break;
-         default: unreachable(">4 channel buffer image store");
-         }
-      } else {
-         switch (num_components) {
-         case 1: opcode = aco_opcode::buffer_store_format_d16_x; break;
-         case 2: opcode = aco_opcode::buffer_store_format_d16_xy; break;
-         case 3: opcode = aco_opcode::buffer_store_format_d16_xyz; break;
-         case 4: opcode = aco_opcode::buffer_store_format_d16_xyzw; break;
-         default: unreachable(">4 channel buffer image store");
-         }
-      }
-      aco_ptr<MUBUF_instruction> store{
-         create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
-      store->operands[0] = Operand(rsrc);
-      store->operands[1] = Operand(vindex);
-      store->operands[2] = Operand::c32(0);
-      store->operands[3] = Operand(data);
-      store->idxen = true;
-      store->glc = glc;
-      store->dlc = false;
-      store->disable_wqm = true;
-      store->sync = sync;
-      ctx->program->needs_exact = true;
-      ctx->block->instructions.emplace_back(std::move(store));
-      return;
-   }
-
-   assert(data.type() == RegType::vgpr);
-   std::vector<Temp> coords = get_image_coords(ctx, instr);
-   Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
-
-   bool level_zero = nir_src_is_const(instr->src[4]) && 
nir_src_as_uint(instr->src[4]) == 0;
-   aco_opcode opcode = level_zero ? aco_opcode::image_store : 
aco_opcode::image_store_mip;
-
    uint32_t dmask = BITFIELD_MASK(num_components);
    /* remove zero/undef elements from data, components which aren't in dmask
     * are zeroed anyway
@@ -6469,6 +6425,9 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* 
instr)
       /* dmask cannot be 0, at least one vgpr is always read */
       if (dmask == 0)
          dmask = 1;
+      /* buffer store only supports consecutive components. */
+      if (dim == GLSL_SAMPLER_DIM_BUF)
+         dmask = BITFIELD_MASK(util_last_bit(dmask));
 
       if (dmask != BITFIELD_MASK(num_components)) {
          uint32_t dmask_count = util_bitcount(dmask);
@@ -6489,6 +6448,50 @@ visit_image_store(isel_context* ctx, 
nir_intrinsic_instr* instr)
       }
    }
 
+   if (dim == GLSL_SAMPLER_DIM_BUF) {
+      Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
+      Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, 
instr->src[1].ssa), 0, v1);
+      aco_opcode opcode;
+      if (!d16) {
+         switch (dmask) {
+         case 0x1: opcode = aco_opcode::buffer_store_format_x; break;
+         case 0x3: opcode = aco_opcode::buffer_store_format_xy; break;
+         case 0x7: opcode = aco_opcode::buffer_store_format_xyz; break;
+         case 0xf: opcode = aco_opcode::buffer_store_format_xyzw; break;
+         default: unreachable(">4 channel buffer image store");
+         }
+      } else {
+         switch (dmask) {
+         case 0x1: opcode = aco_opcode::buffer_store_format_d16_x; break;
+         case 0x3: opcode = aco_opcode::buffer_store_format_d16_xy; break;
+         case 0x7: opcode = aco_opcode::buffer_store_format_d16_xyz; break;
+         case 0xf: opcode = aco_opcode::buffer_store_format_d16_xyzw; break;
+         default: unreachable(">4 channel buffer image store");
+         }
+      }
+      aco_ptr<MUBUF_instruction> store{
+         create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
+      store->operands[0] = Operand(rsrc);
+      store->operands[1] = Operand(vindex);
+      store->operands[2] = Operand::c32(0);
+      store->operands[3] = Operand(data);
+      store->idxen = true;
+      store->glc = glc;
+      store->dlc = false;
+      store->disable_wqm = true;
+      store->sync = sync;
+      ctx->program->needs_exact = true;
+      ctx->block->instructions.emplace_back(std::move(store));
+      return;
+   }
+
+   assert(data.type() == RegType::vgpr);
+   std::vector<Temp> coords = get_image_coords(ctx, instr);
+   Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
+
+   bool level_zero = nir_src_is_const(instr->src[4]) && 
nir_src_as_uint(instr->src[4]) == 0;
+   aco_opcode opcode = level_zero ? aco_opcode::image_store : 
aco_opcode::image_store_mip;
+
    MIMG_instruction* store =
       emit_mimg(bld, opcode, Temp(0, v1), resource, Operand(s4), coords, 
Operand(data));
    store->glc = glc;

Reply via email to