Module: Mesa Branch: main Commit: 4a6ee2c4833b7be8f2ae9e379433fd855a865de3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a6ee2c4833b7be8f2ae9e379433fd855a865de3
Author: Georg Lehmann <dadschoo...@gmail.com> Date: Mon Dec 25 15:32:52 2023 +0100 aco: shrink buffer stores with undef/zero components Buffer stores store 0 like image stores for unspecified components. Foz-DB Navi21: Totals from 91 (0.11% of 79330) affected shaders: Instrs: 63327 -> 63121 (-0.33%) CodeSize: 315312 -> 314440 (-0.28%); split: -0.28%, +0.00% VGPRs: 3144 -> 3120 (-0.76%) Latency: 441424 -> 441300 (-0.03%); split: -0.03%, +0.00% InvThroughput: 65501 -> 65130 (-0.57%) Copies: 6197 -> 5999 (-3.20%) PreVGPRs: 2197 -> 2182 (-0.68%) Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26897> --- src/amd/compiler/aco_instruction_selection.cpp | 91 +++++++++++++------------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 63573876f2e..a7059f38ed4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6410,50 +6410,6 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) bool glc = ctx->options->gfx_level == GFX6 || ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ctx->program->gfx_level < GFX11); - if (dim == GLSL_SAMPLER_DIM_BUF) { - Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); - Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); - aco_opcode opcode; - if (!d16) { - switch (num_components) { - case 1: opcode = aco_opcode::buffer_store_format_x; break; - case 2: opcode = aco_opcode::buffer_store_format_xy; break; - case 3: opcode = aco_opcode::buffer_store_format_xyz; break; - case 4: opcode = aco_opcode::buffer_store_format_xyzw; break; - default: unreachable(">4 channel buffer image store"); - } - } else { - switch (num_components) { - case 1: opcode = aco_opcode::buffer_store_format_d16_x; break; - case 2: opcode = aco_opcode::buffer_store_format_d16_xy; break; - case 3: opcode = aco_opcode::buffer_store_format_d16_xyz; break; - case 4: opcode = aco_opcode::buffer_store_format_d16_xyzw; break; - default: unreachable(">4 channel buffer image store"); - } - } - aco_ptr<MUBUF_instruction> store{ - create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)}; - store->operands[0] = Operand(rsrc); - store->operands[1] = Operand(vindex); - store->operands[2] = Operand::c32(0); - store->operands[3] = Operand(data); - store->idxen = true; - store->glc = glc; - store->dlc = false; - store->disable_wqm = true; - store->sync = sync; - ctx->program->needs_exact = true; - ctx->block->instructions.emplace_back(std::move(store)); - return; - } - - assert(data.type() == RegType::vgpr); - std::vector<Temp> coords = get_image_coords(ctx, instr); - Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); - - bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; - aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip; - uint32_t dmask = BITFIELD_MASK(num_components); /* remove zero/undef elements from data, components which aren't in dmask * are zeroed anyway @@ -6469,6 +6425,9 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) /* dmask cannot be 0, at least one vgpr is always read */ if (dmask == 0) dmask = 1; + /* buffer store only supports consecutive components. */ + if (dim == GLSL_SAMPLER_DIM_BUF) + dmask = BITFIELD_MASK(util_last_bit(dmask)); if (dmask != BITFIELD_MASK(num_components)) { uint32_t dmask_count = util_bitcount(dmask); @@ -6489,6 +6448,50 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) } } + if (dim == GLSL_SAMPLER_DIM_BUF) { + Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); + Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); + aco_opcode opcode; + if (!d16) { + switch (dmask) { + case 0x1: opcode = aco_opcode::buffer_store_format_x; break; + case 0x3: opcode = aco_opcode::buffer_store_format_xy; break; + case 0x7: opcode = aco_opcode::buffer_store_format_xyz; break; + case 0xf: opcode = aco_opcode::buffer_store_format_xyzw; break; + default: unreachable(">4 channel buffer image store"); + } + } else { + switch (dmask) { + case 0x1: opcode = aco_opcode::buffer_store_format_d16_x; break; + case 0x3: opcode = aco_opcode::buffer_store_format_d16_xy; break; + case 0x7: opcode = aco_opcode::buffer_store_format_d16_xyz; break; + case 0xf: opcode = aco_opcode::buffer_store_format_d16_xyzw; break; + default: unreachable(">4 channel buffer image store"); + } + } + aco_ptr<MUBUF_instruction> store{ + create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)}; + store->operands[0] = Operand(rsrc); + store->operands[1] = Operand(vindex); + store->operands[2] = Operand::c32(0); + store->operands[3] = Operand(data); + store->idxen = true; + store->glc = glc; + store->dlc = false; + store->disable_wqm = true; + store->sync = sync; + ctx->program->needs_exact = true; + ctx->block->instructions.emplace_back(std::move(store)); + return; + } + + assert(data.type() == RegType::vgpr); + std::vector<Temp> coords = get_image_coords(ctx, instr); + Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); + + bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; + aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip; + MIMG_instruction* store = emit_mimg(bld, opcode, Temp(0, v1), resource, Operand(s4), coords, Operand(data)); store->glc = glc;