https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/131101
>From 054d80a8ef3d5d456b2ff692d9bf7215c31933a8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 13 Mar 2025 14:55:45 +0700 Subject: [PATCH] AMDGPU: Replace test uses of ptr addrspace(5) undef with poison --- .../AMDGPU/GlobalISel/divergent-control-flow.ll | 2 +- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 4 ++-- llvm/test/CodeGen/AMDGPU/dag-divergence.ll | 2 +- llvm/test/CodeGen/AMDGPU/debug-value.ll | 2 +- llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll | 2 +- llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir | 2 +- llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll | 2 +- ...emory-legalizer-multiple-mem-operands-atomics.mir | 6 +++--- llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll | 2 +- llvm/test/CodeGen/AMDGPU/operand-folding.ll | 2 +- .../test/CodeGen/AMDGPU/private-access-no-objects.ll | 4 ++-- .../CodeGen/AMDGPU/promote-alloca-to-lds-select.ll | 6 +++--- llvm/test/CodeGen/AMDGPU/sad.ll | 12 ++++++------ llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll | 8 ++++---- .../stack-pointer-offset-relative-frameindex.ll | 2 +- 15 files changed, 29 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 3ad5845467cd0..989ee80a1f002 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -183,7 +183,7 @@ bb8: br i1 %tmp10, label %bb11, label %bb12 bb11: - store float 4.0, ptr addrspace(5) undef, align 4 + store float 4.0, ptr addrspace(5) poison, align 4 br label %bb12 bb12: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index c8a4f2d2e6b7b..a60a16c9be47e 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1360,7 +1360,7 @@ bb2: ; preds = %bb1 br i1 %tmp3, label %bb4, label %bb10 bb4: ; preds = %bb2 - %tmp6 = load float, ptr addrspace(5) undef + %tmp6 = load float, ptr addrspace(5) poison %tmp7 = fcmp olt float %tmp6, 0.0 br i1 %tmp7, label %bb8, label %Flow @@ -1380,7 +1380,7 @@ Flow1: ; preds = %bb10 br label %bb1 bb12: ; preds = %bb10 - store volatile <4 x float> %tmp11, ptr addrspace(5) undef, align 16 + store volatile <4 x float> %tmp11, ptr addrspace(5) poison, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll index 8b27a9bde8ea6..9f83393d88061 100644 --- a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll @@ -6,7 +6,7 @@ ; GCN: flat_load_dword ; GCN-NOT: s_load_dword s define amdgpu_kernel void @private_load_maybe_divergent(ptr addrspace(4) %k, ptr %flat) { - %load = load volatile i32, ptr addrspace(5) undef, align 4 + %load = load volatile i32, ptr addrspace(5) poison, align 4 %gep = getelementptr inbounds i32, ptr addrspace(4) %k, i32 %load %maybe.not.uniform.load = load i32, ptr addrspace(4) %gep, align 4 store i32 %maybe.not.uniform.load, ptr addrspace(1) poison diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll index 167c0ce7ceefa..f13bd665cc7f0 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll @@ -35,7 +35,7 @@ bb25: ; preds = %bb bb28: ; preds = %bb25, %bb21 %tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ] - store <4 x float> %tmp29, ptr addrspace(5) undef, align 16 + store <4 x float> %tmp29, ptr addrspace(5) poison, align 16 %tmp30 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 2 %tmp31 = load i32, ptr addrspace(1) %tmp30, align 4 %tmp32 = sext i32 %tmp31 to i64 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index ea33925117aea..25fe57c16c661 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -255,7 +255,7 @@ define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { bb: %tmp = alloca <4 x float>, align 16, addrspace(5) %tmp2 = insertelement <4 x float> poison, float %arg, i32 0 - store <4 x float> %tmp2, ptr addrspace(5) undef + store <4 x float> %tmp2, ptr addrspace(5) poison %tmp3 = icmp eq i32 %arg1, 0 br i1 %tmp3, label %bb4, label %bb5 diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir index f10b5378a7462..cc150b40d0c7e 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir @@ -77,7 +77,7 @@ name: scratch_load_lds_dword_ds_read body: | bb.0: $m0 = S_MOV_B32 0 - SCRATCH_LOAD_LDS_DWORD $vgpr0, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(5) undef` + 4), (store (s32) into `ptr addrspace(3) undef` + 4) + SCRATCH_LOAD_LDS_DWORD $vgpr0, 4, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(5) poison` + 4), (store (s32) into `ptr addrspace(3) undef` + 4) $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) undef`) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll index 3bd8700e33661..ef1cbd78d7cd7 100644 --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -104,7 +104,7 @@ if.end5.i362: ; preds = %if.then3.i356, %if. %conv612.i359 = sext i8 %5 to i32 %sub13.i360 = add nsw i32 %conv612.i359, -48 %cmp714.i361 = icmp ugt i32 %sub13.i360, 9 - store i8 0, ptr addrspace(5) undef, align 16 + store i8 0, ptr addrspace(5) poison, align 16 %6 = load i8, ptr addrspace(1) getelementptr inbounds ([4096 x i8], ptr addrspace(1) @_RSENC_gDcd_______________________________, i64 0, i64 1153), align 1 %arrayidx232250.1 = getelementptr inbounds [128 x i8], ptr addrspace(5) %pD10, i32 0, i32 1 store i8 %6, ptr addrspace(5) %arrayidx232250.1, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir index 6705d1bdb4bb0..ceb2033965e8f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -23,13 +23,13 @@ body: | $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) poison`) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) undef`) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) poison`) S_CBRANCH_SCC0 %bb.1, implicit killed $scc bb.2: @@ -55,7 +55,7 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered (s32) from `ptr addrspace(1) undef`), (load syncscope("workgroup-one-as") seq_cst (s32) from `ptr addrspace(5) undef`) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered (s32) from `ptr addrspace(1) undef`), (load syncscope("workgroup-one-as") seq_cst (s32) from `ptr addrspace(5) poison`) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll index a4cb43e14c2e2..a487650fe8ef2 100644 --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll @@ -13,7 +13,7 @@ ; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @in_worklist_once() #0 { bb: - %tmp = load i64, ptr addrspace(5) undef + %tmp = load i64, ptr addrspace(5) poison br label %bb1 bb1: ; preds = %bb1, %bb diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll index 8bb10d5c7d20f..93631ff12e277 100644 --- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll @@ -130,7 +130,7 @@ define amdgpu_kernel void @no_fold_tied_subregister() #1 { ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define void @no_extra_fold_on_same_opnd() #1 { entry: - %s0 = load i32, ptr addrspace(5) undef, align 4 + %s0 = load i32, ptr addrspace(5) poison, align 4 %s0.i64= zext i32 %s0 to i64 br label %for.body.i.i diff --git a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll index f1e5e68927be7..e687ad940862c 100644 --- a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll +++ b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -18,7 +18,7 @@ ; OPTNONE-NOT: s_mov_b32 ; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} define amdgpu_kernel void @store_to_undef() #0 { - store volatile i32 0, ptr addrspace(5) undef + store volatile i32 0, ptr addrspace(5) poison ret void } @@ -36,7 +36,7 @@ define amdgpu_kernel void @store_to_inttoptr() #0 { ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3] ; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offen glc{{$}} define amdgpu_kernel void @load_from_undef() #0 { - %ld = load volatile i32, ptr addrspace(5) undef + %ld = load volatile i32, ptr addrspace(5) poison ret void } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll index 4a15dad069c63..4c2dd7956a032 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -4,10 +4,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 ; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand( ; CHECK: %alloca = alloca i32 -; CHECK: select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca +; CHECK: select i1 undef, ptr addrspace(5) poison, ptr addrspace(5) %alloca define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 { %alloca = alloca i32, align 4, addrspace(5) - %select = select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca + %select = select i1 undef, ptr addrspace(5) poison, ptr addrspace(5) %alloca store i32 0, ptr addrspace(5) %select, align 4 ret void } @@ -87,7 +87,7 @@ entry: bb1: %ptr2 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %c - %select0 = select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %ptr2 + %select0 = select i1 undef, ptr addrspace(5) poison, ptr addrspace(5) %ptr2 store i32 0, ptr addrspace(5) %ptr1 br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index 27ab9ace811eb..5474338514522 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -105,7 +105,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(ptr addrspace(1) %out, i %t1 = select i1 %icmp1, i32 %a, i32 %b %ret0 = sub i32 %t0, %t1 - store volatile i32 %ret0, ptr addrspace(5) undef + store volatile i32 %ret0, ptr addrspace(5) poison %ret = add i32 %ret0, %c store i32 %ret, ptr addrspace(1) %out @@ -139,7 +139,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(ptr addrspace(1) %out, i %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store volatile i32 %ret, ptr addrspace(5) undef + store volatile i32 %ret, ptr addrspace(5) poison store i32 %ret, ptr addrspace(1) %out ret void } @@ -167,7 +167,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(ptr addrspace(1) %out, i ; GCN-NEXT: s_endpgm %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b - store volatile i32 %t0, ptr addrspace(5) undef + store volatile i32 %t0, ptr addrspace(5) poison %icmp1 = icmp ule i32 %a, %b %t1 = select i1 %icmp1, i32 %a, i32 %b @@ -206,7 +206,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(ptr addrspace(1) %out, i %icmp1 = icmp ule i32 %a, %b %t1 = select i1 %icmp1, i32 %a, i32 %b - store volatile i32 %t1, ptr addrspace(5) undef + store volatile i32 %t1, ptr addrspace(5) poison %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c @@ -238,7 +238,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(ptr addrspace(1) %out, i ; GCN-NEXT: s_endpgm %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %b - store volatile i32 %sub0, ptr addrspace(5) undef + store volatile i32 %sub0, ptr addrspace(5) poison %sub1 = sub i32 %b, %a %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 @@ -274,7 +274,7 @@ define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(ptr addrspace(1) %out %sub0 = sub i32 %a, %b %sub1 = sub i32 %b, %a %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 - store volatile i32 %ret0, ptr addrspace(5) undef + store volatile i32 %ret0, ptr addrspace(5) poison %ret = add i32 %ret0, %c diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll index 47e4406762872..b37a66d3be1ee 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector_v2x16.ll @@ -5,9 +5,9 @@ ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 42 ; GCN: buffer_store_short [[V]], define void @scalar_to_vector_i16() { - %tmp = load <2 x i16>, ptr addrspace(5) undef + %tmp = load <2 x i16>, ptr addrspace(5) poison %tmp1 = insertelement <2 x i16> %tmp, i16 42, i64 0 - store <2 x i16> %tmp1, ptr addrspace(5) undef + store <2 x i16> %tmp1, ptr addrspace(5) poison ret void } @@ -15,8 +15,8 @@ define void @scalar_to_vector_i16() { ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3c00 ; GCN: buffer_store_short [[V]], define void @scalar_to_vector_f16() { - %tmp = load <2 x half>, ptr addrspace(5) undef + %tmp = load <2 x half>, ptr addrspace(5) poison %tmp1 = insertelement <2 x half> %tmp, half 1.0, i64 0 - store <2 x half> %tmp1, ptr addrspace(5) undef + store <2 x half> %tmp1, ptr addrspace(5) poison ret void } diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index d4d3b37a0ed1e..dcf0d3d1829cd 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -148,7 +148,7 @@ if.then4.i: ; preds = %entry %add1.i.i = add i32 %add.i.i, 0 %mul.i.i.i.i = mul i32 %add1.i.i, 1103515245 %add.i.i.i.i = add i32 %mul.i.i.i.i, 12345 - store i32 %add.i.i.i.i, ptr addrspace(5) undef, align 16 + store i32 %add.i.i.i.i, ptr addrspace(5) poison, align 16 br label %shader_eval_surface.exit shader_eval_surface.exit: ; preds = %entry _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits