[llvm-branch-commits] [llvm] AMDGPU: Replace some test undef uses with poison (PR #131103)

Matt Arsenault via llvm-branch-commits Thu, 13 Mar 2025 01:57:15 -0700

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/131103


>From a4537fb6f98c93456a831db04ea9311f3ea3e533 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <matthew.arsena...@amd.com>
Date: Thu, 13 Mar 2025 15:07:57 +0700
Subject: [PATCH] AMDGPU: Replace some test undef uses with poison

---
 .../GlobalISel/call-outgoing-stack-args.ll     |  4 ++--
 .../GlobalISel/divergent-control-flow.ll       |  2 +-
 .../CodeGen/AMDGPU/diverge-interp-mov-lower.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll     |  2 +-
 .../CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll | 18 +++++++++---------
 llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll     |  2 +-
 .../test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll | 10 +++++-----
 llvm/test/CodeGen/AMDGPU/ret_jump.ll           |  4 ++--
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll          |  4 ++--
 llvm/test/CodeGen/AMDGPU/split-smrd.ll         |  2 +-
 llvm/test/CodeGen/AMDGPU/v1024.ll              |  4 ++--
 llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll     |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll                |  4 ++--
 13 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index c99424fe2f7d9..7adaddf2fc8ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -53,7 +53,7 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s2
 ; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; FLATSCR-NEXT:    s_endpgm
-  call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> undef, <16 x 
i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> poison, <16 x 
i32> poison, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
   ret void
 }
 
@@ -294,7 +294,7 @@ define void @func_caller_stack() {
 ; FLATSCR-NEXT:    s_mov_b32 s33, s0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> undef, <16 x 
i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> poison, <16 x 
i32> poison, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 989ee80a1f002..9efed32bbe082 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -231,7 +231,7 @@ bb:
   br label %bb1
 
 bb1:
-  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
+  %lsr.iv = phi i32 [ poison, %bb ], [ %lsr.iv.next, %bb4 ]
   %lsr.iv.next = add i32 %lsr.iv, 1
   %cmp0 = icmp slt i32 %lsr.iv.next, 0
   br i1 %cmp0, label %bb4, label %bb9
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll 
b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
index c923991f5cfcb..e03be90a22d3c 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
@@ -21,7 +21,7 @@ define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg 
%arg) local_unnamed_a
   %tmp6 = load <4 x float>, ptr addrspace(4) %tmp5, align 16
   %tmp7 = extractelement <4 x float> %tmp6, i32 3
   %tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %tmp7) #1
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> undef, <2 x 
half> %tmp8, i1 true, i1 true) #2
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> poison, <2 
x half> %tmp8, i1 true, i1 true) #2
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 235d8dde96658..b86ad8f2e4476 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -29,7 +29,7 @@ entry:
 ; OPT: s_endpgm
 define amdgpu_kernel void @only_undef_dbg_value() #1 {
 bb:
-  call void @llvm.dbg.value(metadata <4 x float> undef, metadata !10, metadata 
!DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14
+  call void @llvm.dbg.value(metadata <4 x float> poison, metadata !10, 
metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14
   ret void, !dbg !14
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll 
b/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
index f86891d174468..1c032857f2688 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
@@ -33,7 +33,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x2bf16(ptr addrspace(1) %arg) #0 
{
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> 
undef, <2 x i16> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> 
poison, <2 x i16> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -43,7 +43,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_4x4x2bf16(ptr addrspace(1) %arg) #0 {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> 
undef, <2 x i16> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> 
poison, <2 x i16> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -53,7 +53,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_32x32x4bf16(ptr addrspace(1) %arg) #0 
{
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> 
undef, <2 x i16> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> 
poison, <2 x i16> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -63,7 +63,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x8bf16(ptr addrspace(1) %arg) #0 
{
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> 
undef, <2 x i16> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> 
poison, <2 x i16> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -73,7 +73,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) 
#0 {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x 
i16> undef, <4 x i16> undef, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x 
i16> poison, <4 x i16> poison, <32 x float> %in.1, i32 0, i32 0, i32 0)
   store <32 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -83,7 +83,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) 
#0 {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x 
i16> undef, <4 x i16> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x 
i16> poison, <4 x i16> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -93,7 +93,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) 
#0 {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> 
undef, <4 x i16> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> 
poison, <4 x i16> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -103,7 +103,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) 
#0 {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x 
i16> undef, <4 x i16> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x 
i16> poison, <4 x i16> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -113,7 +113,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) 
%arg) #0 {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x 
i16> undef, <4 x i16> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x 
i16> poison, <4 x i16> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll 
b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 8484943efb2c7..368ab0ba1a1c9 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -42,7 +42,7 @@ bb:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, 
a[{{[0-9:]+}}]
 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr 
addrspace(1) %arg) {
 bb:
-  call void asm sideeffect "; use $0", "{a[100:131]}"(<32 x float> undef)
+  call void asm sideeffect "; use $0", "{a[100:131]}"(<32 x float> poison)
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, 
float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
   store <32 x float> %mai.1, ptr addrspace(1) %arg
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll 
b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
index 71226c1880e91..0d1ea356663ed 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
@@ -72,7 +72,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_32x32x4f16(ptr addrspace(1) %arg) #0 {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> 
undef, <4 x half> undef, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> 
poison, <4 x half> poison, <32 x float> %in.1, i32 0, i32 0, i32 0)
   store <32 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -82,7 +82,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x4f16(ptr addrspace(1) %arg) #0 {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> 
undef, <4 x half> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> 
poison, <4 x half> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -92,7 +92,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_4x4x4f16(ptr addrspace(1) %arg) #0 {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> 
undef, <4 x half> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> 
poison, <4 x half> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -102,7 +102,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_32x32x8f16(ptr addrspace(1) %arg) #0 {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> 
undef, <4 x half> undef, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> 
poison, <4 x half> poison, <16 x float> %in.1, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
@@ -112,7 +112,7 @@ bb:
 define amdgpu_kernel void @test_mfma_f32_16x16x16f16(ptr addrspace(1) %arg) #0 
{
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
-  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> 
undef, <4 x half> undef, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> 
poison, <4 x half> poison, <4 x float> %in.1, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll 
b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index 46828a17444ab..4e9fb1a2d4026 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -50,7 +50,7 @@ unreachable.bb:                                           ; 
preds = %else
   unreachable
 
 ret.bb:                                          ; preds = %else, %main_body
-  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, 
float, float, float, float, float, float, float, float, float, float, float }> 
undef
+  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, 
float, float, float, float, float, float, float, float, float, float, float }> 
poison
 }
 
 ; GCN-LABEL: 
{{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
@@ -103,7 +103,7 @@ unreachable.bb:                                           ; 
preds = %else
 
 ret.bb:                                          ; preds = %else, %main_body
   store volatile i32 11, ptr addrspace(1) poison
-  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, 
float, float, float, float, float, float, float, float, float, float, float }> 
undef
+  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, 
float, float, float, float, float, float, float, float, float, float, float }> 
poison
 }
 
 ; Function Attrs: nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll 
b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index 9325743be702f..5a3038604d80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -371,7 +371,7 @@ bb:
   %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 
7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> poison, i1 0, i32 
0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %tmp10)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> poison, i1 true, i1 true) #0
   ret void
 }
 
@@ -386,7 +386,7 @@ bb:
   %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 
7.500000e-01, float 2.500000e-01, <8 x i32> poison, <4 x i32> %tmp8, i1 0, i32 
0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float poison)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> poison, i1 true, i1 true) #0
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/split-smrd.ll 
b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
index 712993345b320..a39d50815cec8 100644
--- a/llvm/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
@@ -24,7 +24,7 @@ bb3:                                              ; preds = 
%bb
   %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, 
float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to 
float), <8 x i32> %tmp8, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp9, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float poison)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 
x half> poison, i1 true, i1 true) #0
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll 
b/llvm/test/CodeGen/AMDGPU/v1024.ll
index fc02ffbf7cb1c..a66c4ef9d3da1 100644
--- a/llvm/test/CodeGen/AMDGPU/v1024.ll
+++ b/llvm/test/CodeGen/AMDGPU/v1024.ll
@@ -13,14 +13,14 @@ entry:
   br i1 %c0, label %if.then.i.i, label %if.else.i
 
 if.then.i.i:                                      ; preds = %entry
-  call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr 
addrspace(5) align 4 undef, i64 128, i1 false)
+  call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr 
addrspace(5) align 4 poison, i64 128, i1 false)
   br label %if.then.i62.i
 
 if.else.i:                                        ; preds = %entry
   br label %if.then.i62.i
 
 if.then.i62.i:                                    ; preds = %if.else.i, 
%if.then.i.i
-  call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 undef, ptr 
addrspace(5) align 16 %alloca, i64 128, i1 false)
+  call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 poison, ptr 
addrspace(5) align 16 %alloca, i64 128, i1 false)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll 
b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
index 692491457ae3d..4c1eefdcc22f9 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
@@ -13,7 +13,7 @@ define amdgpu_cs void @xyz () {
 loop:
   %ld = load <8 x float>, ptr addrspace(5) null, align 32
   %in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> 
<i32 0, i32 1, i32 2, i32 3>
-  %wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 
x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
+  %wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 
x half> poison, <16 x half> poison, <4 x float> %in_shuffle)
   %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison>
   store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
   br i1 false, label %.exit, label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 02d6ed339efcf..43fea1d5a2ba3 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -209,7 +209,7 @@ define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, 
<4 x i32> inreg %samp
 main_body:
   %c.1 = mul i32 %c, %d
 
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> undef, <4 x 
i32> poison, i32 %c.1, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> poison, <4 x 
i32> poison, i32 %c.1, i32 0, i32 0, i32 0)
   %c.1.bc = bitcast i32 %c.1 to float
   %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0
@@ -247,7 +247,7 @@ define amdgpu_ps <4 x float> @test4_ptr_buf(<8 x i32> inreg 
%rsrc, <4 x i32> inr
 main_body:
   %c.1 = mul i32 %c, %d
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> undef, ptr 
addrspace(8) poison, i32 %c.1, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> poison, ptr 
addrspace(8) poison, i32 %c.1, i32 0, i32 0, i32 0)
   %c.1.bc = bitcast i32 %c.1 to float
   %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 
%c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
   %tex0 = extractelement <4 x float> %tex, i32 0

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Replace some test undef uses with poison (PR #131103)

Reply via email to