Module: Mesa
Branch: master
Commit: 2f125908b358c2423aa34d2673ec04aa546b971a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f125908b358c2423aa34d2673ec04aa546b971a

Author: Daniel Schürmann <[email protected]>
Date:   Fri Sep 18 17:48:36 2020 +0100

radv,aco: lower_pack_half_2x16

This patch also optimizes pack_half_2x16(a, 0.0).

Totals from 1949 (1.43% of 136546) affected shaders (RAVEN):
SGPRs: 83376 -> 83336 (-0.05%)
CodeSize: 3532144 -> 3512352 (-0.56%)
Instrs: 660746 -> 660682 (-0.01%); split: -0.01%, +0.00%
Cycles: 6780716 -> 6780472 (-0.00%); split: -0.00%, +0.00%
VMEM: 990886 -> 990883 (-0.00%); split: +0.00%, -0.00%
SMEM: 150506 -> 150538 (+0.02%); split: +0.05%, -0.03%
SClause: 30595 -> 30594 (-0.00%); split: -0.01%, +0.00%
Copies: 40801 -> 40729 (-0.18%)
PreSGPRs: 52335 -> 52341 (+0.01%); split: -0.03%, +0.04%
PreVGPRs: 45104 -> 45097 (-0.02%)

Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6777>

---

 src/amd/compiler/aco_instruction_selection.cpp       | 19 +++++++++----------
 src/amd/compiler/aco_instruction_selection_setup.cpp |  2 +-
 src/amd/vulkan/radv_shader.c                         |  1 +
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index f26947a6969..fb989eb9cd4 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2608,18 +2608,17 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr 
*instr)
       }
       break;
    }
-   case nir_op_pack_half_2x16: {
-      Temp src = get_alu_src(ctx, instr->src[0], 2);
-
+   case nir_op_pack_half_2x16_split: {
       if (dst.regClass() == v1) {
-         Temp src0 = bld.tmp(v1);
-         Temp src1 = bld.tmp(v1);
-         bld.pseudo(aco_opcode::p_split_vector, Definition(src0), 
Definition(src1), src);
-         if (!ctx->block->fp_mode.care_about_round16_64 || 
ctx->block->fp_mode.round16_64 == fp_round_tz) {
-            bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, 
src1);
+         nir_const_value* val = nir_src_as_const_value(instr->src[1].src);
+         if (val && val->u32 == 0 && ctx->program->chip_class <= GFX9) {
+            /* upper bits zero on GFX6-GFX9 */
+            bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), 
get_alu_src(ctx, instr->src[0]));
+         } else if (!ctx->block->fp_mode.care_about_round16_64 || 
ctx->block->fp_mode.round16_64 == fp_round_tz) {
+            emit_vop3a_instruction(ctx, instr, 
aco_opcode::v_cvt_pkrtz_f16_f32, dst);
          } else {
-            src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src0);
-            src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src1);
+            Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), 
get_alu_src(ctx, instr->src[0]));
+            Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), 
get_alu_src(ctx, instr->src[1]));
             bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, 
src1);
          }
       } else {
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp 
b/src/amd/compiler/aco_instruction_selection_setup.cpp
index e5b70920307..9c2c5673bb5 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -837,7 +837,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
                   case nir_op_i2f16:
                   case nir_op_i2f32:
                   case nir_op_i2f64:
-                  case nir_op_pack_half_2x16:
+                  case nir_op_pack_half_2x16_split:
                   case nir_op_unpack_half_2x16_split_x:
                   case nir_op_unpack_half_2x16_split_y:
                   case nir_op_fddx:
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 0c328d8a0df..9d25b4f0b89 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -60,6 +60,7 @@ static const struct nir_shader_compiler_options nir_options = 
{
        .lower_pack_snorm_4x8 = true,
        .lower_pack_unorm_2x16 = true,
        .lower_pack_unorm_4x8 = true,
+       .lower_pack_half_2x16 = true,
        .lower_unpack_snorm_2x16 = true,
        .lower_unpack_snorm_4x8 = true,
        .lower_unpack_unorm_2x16 = true,

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to