llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

<details>
<summary>Changes</summary>

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds. Marking the GEPs inbounds here
means that their output won't change with the later patch.

Basically a retry of the very similar PR #<!-- -->131994, as part of an updated 
stack
of PRs.

For SWDEV-516125.

---

Patch is 476.80 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/165426.diff


21 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll 
(+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (+42-42) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+50-50) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+44-44) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+44-44) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+44-44) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+143-143) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+100-100) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+123-123) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll (+123-123) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll 
(+100-100) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+60-60) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll (+60-60) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll 
(+60-60) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+60-60) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll 
(+59-59) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll 
(+56-56) 
- (modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+25-25) 
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll 
b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index d89b39348ad9a..0310b7e788ddf 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr 
addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   successors: %bb.35(0x40000000), %bb.5(0x40000000)
   ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, 
$vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, 
$sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, 
$sgpr56_sgpr57:0x000000000000000F, 
$sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, 
$sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, 
$vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, 
implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, 
$vgpr4_vgpr5, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 
$sgpr24, $vgpr0, 0, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 
killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
@@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr 
addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   successors: %bb.71(0x80000000)
   ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, 
$vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, 
$sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, 
$sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, 
$sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, 
$sgpr64_sgpr65, $sgpr66_sgpr67, 
$sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, 
$sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, 
$vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, 
$vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, 
$vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, 
$vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, 
$vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, 
$vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, 
$vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, 
$vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, 
$vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, 
$vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, 
$vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, 
$vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, 
$vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, 
$vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, 
$sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed 
$vgpr4_vgpr5, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed 
$vgpr4_vgpr5, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed 
$sgpr26, $vgpr4, 0, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 
killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
@@ -1007,12 +1007,12 @@ bb:
   %i11 = icmp eq i32 %i, 0
   %i12 = load i32, ptr addrspace(3) null, align 8
   %i13 = zext i32 %i12 to i64
-  %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13
+  %i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
   br i1 %arg3, label %bb15, label %bb103
 
 bb15:
   %i16 = zext i32 %i to i64
-  %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16
+  %i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16
   %i18 = ptrtoint ptr addrspace(1) %i17 to i64
   br i1 %arg4, label %bb19, label %bb20
 
@@ -1021,7 +1021,7 @@ bb19:
   unreachable
 
 bb20:
-  %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256
+  %i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256
   %i22 = ptrtoint ptr addrspace(1) %i21 to i64
   %i23 = inttoptr i64 %i22 to ptr
   %i24 = load i8, ptr %i23, align 1
@@ -1033,7 +1033,7 @@ bb26:
   unreachable
 
 bb27:
-  %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512
+  %i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512
   %i29 = ptrtoint ptr addrspace(1) %i28 to i64
   %i30 = inttoptr i64 %i29 to ptr
   %i31 = load i8, ptr %i30, align 1
@@ -1045,7 +1045,7 @@ bb33:
   unreachable
 
 bb34:
-  %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768
+  %i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768
   %i36 = ptrtoint ptr addrspace(1) %i35 to i64
   %i37 = inttoptr i64 %i36 to ptr
   %i38 = load i8, ptr %i37, align 1
@@ -1057,7 +1057,7 @@ bb40:
   unreachable
 
 bb41:
-  %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024
+  %i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024
   %i43 = ptrtoint ptr addrspace(1) %i42 to i64
   %i44 = inttoptr i64 %i43 to ptr
   %i45 = load i8, ptr %i44, align 1
@@ -1069,7 +1069,7 @@ bb47:
   unreachable
 
 bb48:
-  %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280
+  %i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280
   %i50 = ptrtoint ptr addrspace(1) %i49 to i64
   %i51 = inttoptr i64 %i50 to ptr
   %i52 = load i8, ptr %i51, align 1
@@ -1081,7 +1081,7 @@ bb54:
   unreachable
 
 bb55:
-  %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536
+  %i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536
   %i57 = ptrtoint ptr addrspace(1) %i56 to i64
   %i58 = or i64 %i57, 1
   %i59 = inttoptr i64 %i58 to ptr
@@ -1113,7 +1113,7 @@ bb67:
 
 bb68:
   %i69 = zext i1 %arg5 to i8
-  %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16
+  %i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16
   %i71 = ptrtoint ptr addrspace(1) %i70 to i64
   br i1 %arg5, label %bb72, label %bb73
 
@@ -1122,7 +1122,7 @@ bb72:
   unreachable
 
 bb73:
-  %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256
+  %i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256
   %i75 = ptrtoint ptr addrspace(1) %i74 to i64
   %i76 = inttoptr i64 %i75 to ptr
   %i77 = load i8, ptr %i76, align 1
@@ -1134,7 +1134,7 @@ bb79:
   unreachable
 
 bb80:
-  %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512
+  %i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512
   %i82 = ptrtoint ptr addrspace(1) %i81 to i64
   %i83 = or i64 %i82, 1
   br i1 %arg6, label %bb84, label %bb85
@@ -1269,7 +1269,7 @@ bb174:
   %i182 = select i1 %arg3, i32 %i181, i32 0
   %i183 = or i32 %i182, %i154
   %i184 = or i32 %i183, %i156
-  %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13
+  %i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13
   br i1 %arg3, label %bb186, label %bb196
 
 bb186:
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll 
b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 890f4f77ed107..e509d7b2b9b1b 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -12,8 +12,8 @@
 define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) 
{
 ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[IN:%.*]], i64 7
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
@@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, 
ptr %in, i32 %cond) {
 ;
 ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, 
ptr %in, i32 %cond) {
 ;
 ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, 
ptr %in, i32 %cond) {
 ;
 ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 
28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, 
ptr %in, i32 %cond) {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -167,12 +167,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr 
%in, i32 %cond) {
 ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX7:       if:
 ; OPT-GFX7-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr 
addrspace(1)
-; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) 
[[TMP0]], i64 28
+; OPT-GFX7-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX7-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], 
align 4
 ; OPT-GFX7-NEXT:    br label [[ENDIF]]
 ; OPT-GFX7:       endif:
@@ -182,8 +182,8 @@ define void 
@test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[IN:%.*]], i64 7
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX8:       if:
@@ -197,12 +197,12 @@ define void 
@test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
 ; OPT-GFX9-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr 
addrspace(1)
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) 
[[TMP0]], i64 28
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], 
align 4
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
 ; OPT-GFX9:       endif:
@@ -212,12 +212,12 @@ define void 
@test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ;
 ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 
999999
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i64 999999
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX10:       if:
 ; OPT-GFX10-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr 
addrspace(1)
-; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) 
[[TMP0]], i64 28
+; OPT-GFX10-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
addrspace(1) [[TMP0]], i64 28
 ; OPT-GFX10-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], 
align 4
 ; OPT-GFX10-NEXT:    br label [[ENDIF]]
 ; OPT-GFX10:       endif:
@@ -303,8 +303,8 @@ define void 
@test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(1)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -325,12 +325,12 @@ done:
 define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr 
%in, i32 %cond) {
 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
 ; OPT-NEXT:  entry:
-; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], 
i64 999999
 ; OPT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
 ; OPT-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT:       if:
 ; OPT-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
-; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], 
i64 28
+; OPT-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) 
[[TMP0]], i64 28
 ; OPT-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
 ; OPT-NEXT:    br label [[ENDIF]]
 ; OPT:       endif:
@@ -416,8 +416,8 @@ define void 
@test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i64 999999
-  %in.gep = getelementptr i32, ptr %in, i64 7
+  %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+  %in.gep = getelementptr inbounds i32, ptr %in, i64 7
   %cast = addrspacecast ptr %in.gep to ptr addrspace(4)
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %endif, label %if
@@ -438,8 +438,8 @@ done:
 define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX7-NEXT:  entry:
-; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 
1024
-; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i32 1024
+; OPT-GFX7-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 4095
 ; OPT-GFX7-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 
0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX7-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX7-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, 
ptr %in) #1 {
 ;
 ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX8-NEXT:  entry:
-; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 
1024
-; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX8-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i32 1024
+; OPT-GFX8-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 4095
 ; OPT-GFX8-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 
0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX8-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX8-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr 
%out, ptr %in) #1 {
 ;
 ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX9-NEXT:  entry:
-; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 
1024
+; OPT-GFX9-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i32 1024
 ; OPT-GFX9-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 
0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX9-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX9-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
 ; OPT-GFX9:       if:
-; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 
4095
+; OPT-GFX9-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 4095
 ; OPT-GFX9-NEXT:    [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
 ; OPT-GFX9-NEXT:    [[CAST:%.*]] = sext i8 [[LOAD]] to i32
 ; OPT-GFX9-NEXT:    br label [[ENDIF]]
@@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, 
ptr %in) #1 {
 ;
 ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX10-NEXT:  entry:
-; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 
1024
-; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 
4095
+; OPT-GFX10-NEXT:    [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr 
[[OUT:%.*]], i32 1024
+; OPT-GFX10-NEXT:    [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr 
[[IN:%.*]], i64 4095
 ; OPT-GFX10-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 
0) #[[ATTR3:[0-9]+]]
 ; OPT-GFX10-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
 ; OPT-GFX10-NEXT:    br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, 
ptr %in) #1 {
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %out.gep = getelementptr i32, ptr %out, i32 1024
-  %in.gep = getelementptr i8, ptr %in, i64 4095
+  %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+  %in.gep = getelementptr inbounds i8, ptr %in, i64 4095
   %tid = cal...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/165426
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to