https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/140607
None >From 4021541f1a05550bf27348b67f551d4ba73ef9e1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 14 May 2025 08:50:59 +0200 Subject: [PATCH] AMDGPU: Add baseline tests for #139317 --- .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 320 ++++++++++++++ .../AMDGPU/fold-short-64-bit-literals.mir | 392 +++++++++++++++++- ...issue139317-bad-opsel-reg-sequence-fold.ll | 66 +++ .../si-fold-operands-subreg-imm.gfx942.mir | 202 +++++++++ .../AMDGPU/si-fold-operands-subreg-imm.mir | 26 ++ 5 files changed, 1001 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir create mode 100644 llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir new file mode 100644 index 0000000000000..3021761f099fa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir @@ -0,0 +1,320 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] + ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %1:areg_64_align2 = COPY %0 + $agpr0_agpr1 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] + ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec + %1:areg_64_align2 = COPY %0 + $agpr0_agpr1 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_literal_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_literal_copy_to_areg_64 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] + ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec + %1:areg_64_align2 = COPY %0 + $agpr0_agpr1 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 + ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %1:agpr_32 = COPY %0.sub0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 + ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %1:agpr_32 = COPY %0.sub1 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub0 + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + %1:agpr_32 = COPY %0.sub0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub1 + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + %1:agpr_32 = COPY %0.sub1 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + + +--- +name: v_mov_b64_pseudo_imm_0_copy_to_av_64 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_av_64 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[V_MOV_B]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %1:av_64_align2 = COPY %0 + S_ENDPGM 0, implicit %1 + +... + + +# XXX need f64 use + + + + +--- +name: v_mov_b64_pseudo_imm_0_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, 4607182418800017408, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_lit_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub0_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub0_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = COPY %1.sub0 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub1_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub1_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = COPY %1.sub1 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub0_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub0_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vgpr_32 = COPY %1.sub0 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub1_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub1_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248, implicit $exec + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vgpr_32 = COPY %1.sub1 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir index 1f15066264c74..cfb42de4456f3 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck --check-prefixes=GCN,GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck --check-prefixes=GCN,GFX9 %s --- name: no_fold_fp_64bit_literal_sgpr @@ -41,10 +42,16 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: fold_fp_32bit_literal_sgpr - ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, 4636737291354636288, 0, [[DEF]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] + ; GFX10-LABEL: name: fold_fp_32bit_literal_sgpr + ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX10-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, 4636737291354636288, 0, [[DEF]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] + ; + ; GFX9-LABEL: name: fold_fp_32bit_literal_sgpr + ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4636737291354636288 + ; GFX9-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]] %0:vreg_64 = IMPLICIT_DEF %1:sreg_64 = S_MOV_B64_IMM_PSEUDO 4636737291354636288 %2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec @@ -136,3 +143,378 @@ body: | %2:vreg_64 = V_PK_ADD_F32 0, %0, 0, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN_TO_EPILOG %2 ... + +--- +name: v_mov_b64_pseudo_imm_0_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, 4607182418800017408, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_lit_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_full_copy_to_f64_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_full_copy_to_f64_use + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + ; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vreg_64_align2 = COPY $vgpr0_vgpr1 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vreg_64_align2 = COPY %1 + %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub0_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub0_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = COPY %1.sub0 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub1_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_copy_extract_sub1_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = COPY %1.sub1 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub0_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub0_to_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vgpr_32 = COPY %1.sub0 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub1_to_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GFX10-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub1_to_f32_use + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1072693248, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX10-NEXT: S_ENDPGM 0 + ; + ; GFX9-LABEL: name: v_mov_b64_pseudo_imm_fp64_1_copy_extract_sub1_to_f32_use + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248, implicit $exec + ; GFX9-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX9-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vgpr_32 = COPY %1.sub1 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_copy_extract_sub0_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_copy_extract_sub0_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1073741824, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222482165760, implicit $exec + %2:vgpr_32 = COPY %1.sub0 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_copy_extract_sub1_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_copy_extract_sub1_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222482165760, implicit $exec + %2:vgpr_32 = COPY %1.sub1 + %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %2, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_direct_subreg_sub0_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_direct_subreg_sub0_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub0, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_splat_direct_subreg_sub1_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_splat_direct_subreg_sub1_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub1, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_direct_subreg_sub0_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GFX10-LABEL: name: v_mov_b64_pseudo_lit_direct_subreg_sub0_f32_use + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 8888, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX10-NEXT: S_ENDPGM 0 + ; + ; GFX9-LABEL: name: v_mov_b64_pseudo_lit_direct_subreg_sub0_f32_use + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 42945378001592, implicit $exec + ; GFX9-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[V_MOV_B]].sub0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX9-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 42945378001592, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub0, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_direct_subreg_sub1_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GFX10-LABEL: name: v_mov_b64_pseudo_lit_direct_subreg_sub1_f32_use + ; GFX10: liveins: $vgpr0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 9999, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX10-NEXT: S_ENDPGM 0 + ; + ; GFX9-LABEL: name: v_mov_b64_pseudo_lit_direct_subreg_sub1_f32_use + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 42945378001592, implicit $exec + ; GFX9-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[V_MOV_B]].sub1, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GFX9-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 42945378001592, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub1, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_direct_subreg_sub0_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_direct_subreg_sub0_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1073741824, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222482165760, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub0, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_direct_subreg_sub1_f32_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: v_mov_b64_pseudo_imm_fp32_1_fp32_2_direct_subreg_sub1_f32_use + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, 1065353216, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4575657222482165760, implicit $exec + %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %0, 1, %1.sub1, 0, 0, implicit $mode, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll b/llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll new file mode 100644 index 0000000000000..7d1ea68e63241 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s + +; Check for correct folding of the constants produced by the +; stepvector into the fadd. The value should not get lost when folding +; through subregister extracts of reg_sequence. +define amdgpu_kernel void @stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38(ptr addrspace(1) readonly captures(none) %.global, ptr addrspace(1) writeonly captures(none) %.global1, i32 %arg2) { +; GFX942-LABEL: stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x10 +; GFX942-NEXT: s_mov_b32 s7, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_cmp_eq_u32 s6, 0 +; GFX942-NEXT: s_cbranch_scc1 .LBB0_3 +; GFX942-NEXT: ; %bb.1: ; %.lr.ph.preheader +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_mov_b32 s8, 0x47004600 +; GFX942-NEXT: s_mov_b32 s9, 0x45004400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: .LBB0_2: ; %.lr.ph +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[4:7], v2, s[2:3] +; GFX942-NEXT: s_add_u32 s4, s4, 8 +; GFX942-NEXT: s_addc_u32 s5, s5, 0 +; GFX942-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v7, v7, s8 +; GFX942-NEXT: v_pk_add_f16 v6, v6, s9 +; GFX942-NEXT: v_pk_add_f16 v5, v5, 0 +; GFX942-NEXT: v_pk_add_f16 v4, v4, 1.0 op_sel:[0,1] op_sel_hi:[1,0] +; GFX942-NEXT: global_store_dwordx4 v2, v[4:7], s[0:1] +; GFX942-NEXT: s_add_u32 s0, s0, 16 +; GFX942-NEXT: s_addc_u32 s1, s1, 0 +; GFX942-NEXT: s_add_u32 s2, s2, 16 +; GFX942-NEXT: s_addc_u32 s3, s3, 0 +; GFX942-NEXT: s_cbranch_vccnz .LBB0_2 +; GFX942-NEXT: .LBB0_3: ; %._crit_edge +; GFX942-NEXT: s_endpgm +bb: + %i = tail call <8 x i32> @llvm.stepvector.v8i32() + %i3 = sitofp <8 x i32> %i to <8 x half> + %i4 = zext i32 %arg2 to i64 + %.not = icmp eq i32 %arg2, 0 + br i1 %.not, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %bb + %i5 = phi i64 [ %i6, %.lr.ph ], [ 0, %bb ] + %i6 = add nuw nsw i64 %i5, 8 + %i7 = getelementptr inbounds nuw half, ptr addrspace(1) %.global1, i64 %i5 + %i8 = load <8 x half>, ptr addrspace(1) %i7, align 2 + %i9 = fadd <8 x half> %i8, %i3 + %i10 = getelementptr inbounds nuw half, ptr addrspace(1) %.global, i64 %i5 + store <8 x half> %i9, ptr addrspace(1) %i10, align 2 + %i11 = icmp samesign ult i64 %i6, %i4 + br i1 %i11, label %.lr.ph, label %._crit_edge + +._crit_edge: ; preds = %.lr.ph, %bb + ret void +} + +declare <8 x i32> @llvm.stepvector.v8i32() #0 + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir new file mode 100644 index 0000000000000..c1ce216955dca --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir @@ -0,0 +1,202 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=si-fold-operands -o - %s | FileCheck %s + +# Test behavior of folding into op_sel operands through reg_sequence +# with subregister uses + +--- +name: issue139317 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 1107312640 + %2:sreg_32 = S_MOV_B32 1006632960 + %3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1 + %4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: issue139317_sub0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_sub0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 4, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 1107312640 + %2:sreg_32 = S_MOV_B32 1006632960 + %3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1 + %4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub0:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: issue139317_nested_reg_sequence +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_nested_reg_sequence + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1191200256 + ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1157645312 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_2]], %subreg.sub0, killed [[S_MOV_B32_3]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, killed [[COPY]], 8, [[REG_SEQUENCE2]].sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 1107312640 + %2:sreg_32 = S_MOV_B32 1006632960 + %3:sreg_32 = S_MOV_B32 1191200256 + %4:sreg_32 = S_MOV_B32 1157645312 + %5:sgpr_64 = REG_SEQUENCE killed %2, %subreg.sub0, killed %1, %subreg.sub1 + %6:sgpr_64 = REG_SEQUENCE killed %3, %subreg.sub0, killed %4, %subreg.sub1 + %7:sgpr_128 = REG_SEQUENCE %5, %subreg.sub0_sub1, %6, %subreg.sub2_sub3 + %8:vgpr_32 = nofpexcept V_PK_ADD_F16 8, killed %0, 8, %7.sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %8 + +... + +--- +name: issue139317_foldable_neg_imm +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_foldable_neg_imm + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 4294967295, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 -16 + %2:sreg_32 = S_MOV_B32 1006632960 + %3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1 + %4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: issue139317_foldable_fp_imm_0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_foldable_fp_imm_0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 15360 + %2:sreg_32 = S_MOV_B32 1006632960 + %3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1 + %4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: issue139317_foldable_fp_imm_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_foldable_fp_imm_1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32 = S_MOV_B32 1006632960 + %2:sreg_32 = S_MOV_B32 15360 + %3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1 + %4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: issue139317_reg_sequence_subreg_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_reg_sequence_subreg_use + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4755871576254054400 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B]].sub0, %subreg.sub0, [[S_MOV_B]].sub1, %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[REG_SEQUENCE]].sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_64 = S_MOV_B64_IMM_PSEUDO 4755871576254054400 + %2:sgpr_64 = REG_SEQUENCE killed %1.sub0, %subreg.sub0, %1.sub1, %subreg.sub1 + %3:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %2.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %3 +... + +--- +name: issue139317_reg_sequence_subreg_use_foldable_imm +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: issue139317_reg_sequence_subreg_use_foldable_imm + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 65971704299520 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B]].sub0, %subreg.sub0, [[S_MOV_B]].sub1, %subreg.sub1 + ; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_64 = S_MOV_B64_IMM_PSEUDO 65971704299520 + %2:sgpr_64 = REG_SEQUENCE killed %1.sub0, %subreg.sub0, %1.sub1, %subreg.sub1 + %3:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %2.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir index 8d6c3efb5ded5..38b4533a14895 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir @@ -149,3 +149,29 @@ body: | S_ENDPGM 0, implicit $scc ... + +--- +name: s_mov_b64_copy_subreg_into_reg_sequence_user +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr8, $sgpr9 + + ; CHECK-LABEL: name: s_mov_b64_copy_subreg_into_reg_sequence_user + ; CHECK: liveins: $sgpr8, $sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 16, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY1]], 0, implicit-def $scc, implicit $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]] + %0:sreg_32 = COPY $sgpr8 + %1:sreg_32 = COPY $sgpr9 + %2:sreg_64 = S_MOV_B64 16 + %3:sreg_32 = COPY %2.sub0 + %4:sreg_32 = COPY %2.sub1 + %5:sreg_32 = S_ADD_U32 %0, %3, implicit-def $scc + %6:sreg_32 = S_ADDC_U32 %1, %4, implicit-def $scc, implicit $scc + S_ENDPGM 0, implicit %5, implicit %6 + +... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits