llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) <details> <summary>Changes</summary> This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. Marking the GEPs inbounds here means that their output won't change with the later patch. Basically a retry of the very similar PR #<!-- -->131994, as part of an updated stack of PRs. For SWDEV-516125. --- Patch is 476.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165426.diff 21 Files Affected: - (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (+42-42) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+50-50) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+44-44) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+44-44) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+44-44) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+143-143) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+100-100) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+123-123) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll (+123-123) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (+100-100) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+60-60) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll (+60-60) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+60-60) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+60-60) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+59-59) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+56-56) - (modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+25-25) - (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1) ``````````diff diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index d89b39348ad9a..0310b7e788ddf 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec ; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec @@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.71(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec + ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec ; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec @@ -1007,12 +1007,12 @@ bb: %i11 = icmp eq i32 %i, 0 %i12 = load i32, ptr addrspace(3) null, align 8 %i13 = zext i32 %i12 to i64 - %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13 + %i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13 br i1 %arg3, label %bb15, label %bb103 bb15: %i16 = zext i32 %i to i64 - %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16 + %i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16 %i18 = ptrtoint ptr addrspace(1) %i17 to i64 br i1 %arg4, label %bb19, label %bb20 @@ -1021,7 +1021,7 @@ bb19: unreachable bb20: - %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256 + %i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256 %i22 = ptrtoint ptr addrspace(1) %i21 to i64 %i23 = inttoptr i64 %i22 to ptr %i24 = load i8, ptr %i23, align 1 @@ -1033,7 +1033,7 @@ bb26: unreachable bb27: - %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512 + %i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512 %i29 = ptrtoint ptr addrspace(1) %i28 to i64 %i30 = inttoptr i64 %i29 to ptr %i31 = load i8, ptr %i30, align 1 @@ -1045,7 +1045,7 @@ bb33: unreachable bb34: - %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768 + %i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768 %i36 = ptrtoint ptr addrspace(1) %i35 to i64 %i37 = inttoptr i64 %i36 to ptr %i38 = load i8, ptr %i37, align 1 @@ -1057,7 +1057,7 @@ bb40: unreachable bb41: - %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024 + %i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024 %i43 = ptrtoint ptr addrspace(1) %i42 to i64 %i44 = inttoptr i64 %i43 to ptr %i45 = load i8, ptr %i44, align 1 @@ -1069,7 +1069,7 @@ bb47: unreachable bb48: - %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280 + %i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280 %i50 = ptrtoint ptr addrspace(1) %i49 to i64 %i51 = inttoptr i64 %i50 to ptr %i52 = load i8, ptr %i51, align 1 @@ -1081,7 +1081,7 @@ bb54: unreachable bb55: - %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536 + %i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536 %i57 = ptrtoint ptr addrspace(1) %i56 to i64 %i58 = or i64 %i57, 1 %i59 = inttoptr i64 %i58 to ptr @@ -1113,7 +1113,7 @@ bb67: bb68: %i69 = zext i1 %arg5 to i8 - %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16 + %i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16 %i71 = ptrtoint ptr addrspace(1) %i70 to i64 br i1 %arg5, label %bb72, label %bb73 @@ -1122,7 +1122,7 @@ bb72: unreachable bb73: - %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256 + %i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256 %i75 = ptrtoint ptr addrspace(1) %i74 to i64 %i76 = inttoptr i64 %i75 to ptr %i77 = load i8, ptr %i76, align 1 @@ -1134,7 +1134,7 @@ bb79: unreachable bb80: - %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512 + %i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512 %i82 = ptrtoint ptr addrspace(1) %i81 to i64 %i83 = or i64 %i82, 1 br i1 %arg6, label %bb84, label %bb85 @@ -1269,7 +1269,7 @@ bb174: %i182 = select i1 %arg3, i32 %i181, i32 0 %i183 = or i32 %i182, %i154 %i184 = or i32 %i183, %i156 - %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13 + %i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13 br i1 %arg3, label %bb186, label %bb196 bb186: diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll index 890f4f77ed107..e509d7b2b9b1b 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -12,8 +12,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX7: if: @@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX8: if: @@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 ; OPT-GFX9-NEXT: br label [[ENDIF]] ; OPT-GFX9: endif: @@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX10: if: -; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 +; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 ; OPT-GFX10-NEXT: br label [[ENDIF]] ; OPT-GFX10: endif: @@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -167,12 +167,12 @@ done: define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX7: if: ; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX7-NEXT: br label [[ENDIF]] ; OPT-GFX7: endif: @@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX8: if: @@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: ; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX9-NEXT: br label [[ENDIF]] ; OPT-GFX9: endif: @@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX10: if: ; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX10-NEXT: br label [[ENDIF]] ; OPT-GFX10: endif: @@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cast = addrspacecast ptr %in.gep to ptr addrspace(1) %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -325,12 +325,12 @@ done: define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( ; OPT-NEXT: entry: -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT: if: ; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4) -; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28 +; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28 ; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4 ; OPT-NEXT: br label [[ENDIF]] ; OPT: endif: @@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cast = addrspacecast ptr %in.gep to ptr addrspace(4) %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -438,8 +438,8 @@ done: define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 ; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1 ; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 ; OPT-GFX9-NEXT: br label [[ENDIF]] @@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i32 1024 - %in.gep = getelementptr i8, ptr %in, i64 4095 + %out.gep = getelementptr inbounds i32, ptr %out, i32 1024 + %in.gep = getelementptr inbounds i8, ptr %in, i64 4095 %tid = cal... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/165426 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
