Mesa (main): nir: remove sad_u8x4

GitLab Mirror Fri, 05 Jan 2024 12:01:22 -0800

Module: Mesa
Branch: main
Commit: ae54cbeb3f40abebb8f534c69de620d6a0ca4b2b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae54cbeb3f40abebb8f534c69de620d6a0ca4b2b


Author: Rhys Perry <pendingchao...@gmail.com>
Date:   Mon Nov 20 15:53:39 2023 +0000

nir: remove sad_u8x4

All uses of this can be replaced with msad_4x8.

Signed-off-by: Rhys Perry <pendingchao...@gmail.com>
Reviewed-by: Georg Lehmann <dadschoo...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26907>

---

 src/amd/common/ac_nir_lower_ngg.c                  |  8 ++++----
 src/amd/compiler/aco_instruction_selection.cpp     |  5 -----
 .../compiler/aco_instruction_selection_setup.cpp   |  1 -
 src/amd/llvm/ac_nir_to_llvm.c                      |  5 -----
 src/compiler/nir/nir_opcodes.py                    | 24 ----------------------
 src/compiler/nir/nir_range_analysis.c              |  1 -
 6 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_nir_lower_ngg.c 
b/src/amd/common/ac_nir_lower_ngg.c
index 245b7d27453..987f5b8fed8 100644
--- a/src/amd/common/ac_nir_lower_ngg.c
+++ b/src/amd/common/ac_nir_lower_ngg.c
@@ -284,7 +284,7 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, 
unsigned num_lds_dwords
     *
     * If the v_dot instruction can't be used, we left-shift the packed bytes.
     * This will shift out the unneeded bytes and shift in zeroes instead,
-    * then we sum them using v_sad_u8.
+    * then we sum them using v_msad_u8.
     */
 
    nir_def *lane_id = nir_load_subgroup_invocation(b);
@@ -302,7 +302,7 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, 
unsigned num_lds_dwords
          return nir_udot_4x8_uadd(b, packed, dot_op, nir_imm_int(b, 0));
       } else {
          nir_def *sad_op = nir_ishl(b, nir_ishl(b, packed, shift), shift);
-         return nir_sad_u8x4(b, sad_op, nir_imm_int(b, 0), nir_imm_int(b, 0));
+         return nir_msad_4x8(b, sad_op, nir_imm_int(b, 0), nir_imm_int(b, 0));
       }
    } else if (num_lds_dwords == 2) {
       nir_def *dot_op = !use_dot ? NULL : nir_ushr(b, nir_ushr(b, 
nir_imm_int64(b, 0x0101010101010101), shift), shift);
@@ -317,8 +317,8 @@ summarize_repack(nir_builder *b, nir_def *packed_counts, 
unsigned num_lds_dwords
          return nir_udot_4x8_uadd(b, packed_dw1, nir_unpack_64_2x32_split_y(b, 
dot_op), sum);
       } else {
          nir_def *sad_op = nir_ishl(b, nir_ishl(b, nir_pack_64_2x32_split(b, 
packed_dw0, packed_dw1), shift), shift);
-         nir_def *sum = nir_sad_u8x4(b, nir_unpack_64_2x32_split_x(b, sad_op), 
nir_imm_int(b, 0), nir_imm_int(b, 0));
-         return nir_sad_u8x4(b, nir_unpack_64_2x32_split_y(b, sad_op), 
nir_imm_int(b, 0), sum);
+         nir_def *sum = nir_msad_4x8(b, nir_unpack_64_2x32_split_x(b, sad_op), 
nir_imm_int(b, 0), nir_imm_int(b, 0));
+         return nir_msad_4x8(b, nir_unpack_64_2x32_split_y(b, sad_op), 
nir_imm_int(b, 0), sum);
       }
    } else {
       unreachable("Unimplemented NGG wave count");
diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index be1468f1fd5..1fe6918aa51 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3420,11 +3420,6 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       }
       break;
    }
-   case nir_op_sad_u8x4: {
-      assert(dst.regClass() == v1);
-      emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, 
false);
-      break;
-   }
    case nir_op_msad_4x8: {
       assert(dst.regClass() == v1);
       emit_vop3a_instruction(ctx, instr, aco_opcode::v_msad_u8, dst, false, 
3u, true);
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp 
b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 9004ef2e632..32e9a08a808 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -392,7 +392,6 @@ init_context(isel_context* ctx, nir_shader* shader)
                case nir_op_frexp_sig:
                case nir_op_frexp_exp:
                case nir_op_cube_amd:
-               case nir_op_sad_u8x4:
                case nir_op_msad_4x8:
                case nir_op_udot_4x8_uadd:
                case nir_op_sdot_4x8_iadd:
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index e712e2d6820..06dfaed1a39 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -1253,11 +1253,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
       break;
    }
 
-   case nir_op_sad_u8x4:
-      result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
-                                  (LLVMValueRef[]){src[0], src[1], src[2]}, 3, 
0);
-      break;
-
    case nir_op_msad_4x8:
       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.msad.u8", ctx->ac.i32,
                                   (LLVMValueRef[]){src[1], src[0], src[2]}, 3, 
0);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 66ce98e46cf..0770351c988 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -1126,30 +1126,6 @@ if (bits == 0) {
 }
 """)
 
-triop_horiz("sad_u8x4", 1, 1, 1, 1, """
-uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0;
-uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8;
-uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16;
-uint8_t s0_b3 = (src0.x & 0xff000000) >> 24;
-
-uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0;
-uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8;
-uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16;
-uint8_t s1_b3 = (src1.x & 0xff000000) >> 24;
-
-dst.x = src2.x +
-        (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
-        (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
-        (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
-        (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
-""", description = """
-Sum of absolute differences with accumulation. Equivalent to AMD's v_sad_u8 
instruction.
-
-The first two sources contain packed 8-bit unsigned integers, the instruction 
will
-calculate the absolute difference of these, and then add them together. There 
is also a
-third source which is a 32-bit unsigned integer and added to the result.
-""")
-
 triop("msad_4x8", tuint32, "", """
 dst = msad(src0, src1, src2);
 """, description = """
diff --git a/src/compiler/nir/nir_range_analysis.c 
b/src/compiler/nir/nir_range_analysis.c
index 0ac5638f695..79c7ed92cfd 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1864,7 +1864,6 @@ get_alu_uub(struct analysis_state *state, struct 
uub_query q, uint32_t *result,
    case nir_op_b2i32:
       *result = 1;
       break;
-   case nir_op_sad_u8x4:
    case nir_op_msad_4x8:
       *result = MIN2((uint64_t)src[2] + 4 * 255, UINT32_MAX);
       break;

Mesa (main): nir: remove sad_u8x4

Reply via email to