https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/131317
This just checked the s_endpgm. Generate full checks, and remove undefs. >From f20fb4dfc8859961a182eac3504e243960e53486 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Fri, 14 Mar 2025 18:06:43 +0700 Subject: [PATCH] AMDGPU: Switch simplifydemandedbits-recursion.ll to generated checks This just checked the s_endpgm. Generate full checks, and remove undefs. --- .../AMDGPU/simplifydemandedbits-recursion.ll | 79 +++++++++++++++++-- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll index 55b4d12805926..a5299ea36958d 100644 --- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s ; Check we can compile this bugpoint-reduced test without an @@ -9,17 +10,79 @@ @0 = external unnamed_addr addrspace(3) global [462 x float], align 4 -; Function Attrs: nounwind readnone speculatable declare i32 @llvm.amdgcn.workitem.id.y() #0 - -; Function Attrs: nounwind readnone speculatable declare i32 @llvm.amdgcn.workitem.id.x() #0 - -; Function Attrs: nounwind readnone speculatable declare float @llvm.fmuladd.f32(float, float, float) #0 -; CHECK: s_endpgm define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 { +; CHECK-LABEL: foo: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10 +; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0 +; CHECK-NEXT: s_movk_i32 s0, 0x54 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp1_b32 s6, 8 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s6, 16 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2 +; CHECK-NEXT: s_xor_b64 s[4:5], s[4:5], -1 +; CHECK-NEXT: s_bitcmp1_b32 s2, 24 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1 +; CHECK-NEXT: s_bitcmp1_b32 s3, 0 +; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s10, 8 +; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 +; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7] +; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5] +; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11] +; CHECK-NEXT: s_and_b64 s[8:9], exec, s[8:9] +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: .LBB0_1: ; %.loopexit145 +; CHECK-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NEXT: ; Child Loop BB0_3 Depth 2 +; CHECK-NEXT: ; Child Loop BB0_4 Depth 3 +; CHECK-NEXT: ; Child Loop BB0_5 Depth 2 +; CHECK-NEXT: v_mov_b32_e32 v2, v1 +; CHECK-NEXT: s_branch .LBB0_3 +; CHECK-NEXT: .LBB0_2: ; %.loopexit +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x540, v2 +; CHECK-NEXT: s_mov_b64 vcc, s[4:5] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_5 +; CHECK-NEXT: .LBB0_3: ; %bb13 +; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ; => This Loop Header: Depth=2 +; CHECK-NEXT: ; Child Loop BB0_4 Depth 3 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: v_mov_b32_e32 v3, v2 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 +; CHECK-NEXT: .LBB0_4: ; %bb21 +; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ; Parent Loop BB0_3 Depth=2 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=3 +; CHECK-NEXT: ds_write_b32 v3, v0 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 32, v3 +; CHECK-NEXT: s_mov_b64 vcc, s[2:3] +; CHECK-NEXT: s_cbranch_vccz .LBB0_4 +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_5: ; %bb31 +; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NEXT: s_mov_b64 vcc, s[6:7] +; CHECK-NEXT: s_cbranch_vccz .LBB0_5 +; CHECK-NEXT: ; %bb.6: ; %bb30 +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[8:9] +; CHECK-NEXT: s_cbranch_vccz .LBB0_1 +; CHECK-NEXT: ; %bb.7: ; %bb11 +; CHECK-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -47,7 +110,7 @@ bb13: ; preds = %.loopexit, %.loopex bb17: ; preds = %bb13 %tmp18 = mul i32 %tmp15, 224 - %tmp19 = add i32 undef, %tmp18 + %tmp19 = add i32 0, %tmp18 br label %bb21 .loopexit: ; preds = %bb21, %bb13 @@ -58,7 +121,7 @@ bb21: ; preds = %bb21, %bb17 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] %tmp23 = add i32 %tmp22, %tmp16 %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23 - store float undef, ptr addrspace(3) %tmp24, align 4 + store float 0.0, ptr addrspace(3) %tmp24, align 4 %tmp25 = add nuw i32 %tmp22, 8 br i1 %c3, label %bb21, label %.loopexit _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits