https://github.com/mihajlovicana created https://github.com/llvm/llvm-project/pull/131329
SI_CS_CHAIN adds register classes to generic instruction. This causes legalize combiner to break. Patch fixes this issue by adding COPY instructions. >From 3e36fbad5d782690ef845f754f3203d9d79b0602 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic <ana.mihajlo...@amd.com> Date: Mon, 10 Mar 2025 14:19:13 +0100 Subject: [PATCH 1/3] AMDGPU/GlobalISel: add tests for CS_CHAIN --- .../irtranslator-amdgcn-cs-chain.ll | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index 4b0ff1b2eb470..e8ee5d8625ccb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -134,4 +134,187 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p unreachable } +define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %0) #10 { + ; GFX11-LABEL: name: retry_vgpr_alloc.v20i32 + ; GFX11: bb.1 (%ir-block.1): + ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $sgpr14 + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $sgpr15 + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $sgpr17 + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $sgpr18 + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $sgpr19 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<20 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX11-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 + ; GFX11-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) + ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) + ; GFX11-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32) + ; GFX11-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[EVEC2]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64) + ; GFX11-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2 + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; GFX11-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV4]](s32) + ; GFX11-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV5]](s32) + ; GFX11-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV6]](s32) + ; GFX11-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV7]](s32) + ; GFX11-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV8]](s32) + ; GFX11-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV9]](s32) + ; GFX11-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV10]](s32) + ; GFX11-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV11]](s32) + ; GFX11-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV12]](s32) + ; GFX11-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV13]](s32) + ; GFX11-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV14]](s32) + ; GFX11-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV15]](s32) + ; GFX11-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT16:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV16]](s32) + ; GFX11-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT16]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT17:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV17]](s32) + ; GFX11-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT17]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT18:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV18]](s32) + ; GFX11-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32) + ; GFX11-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32) + ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19 + ; + ; GFX10-LABEL: name: retry_vgpr_alloc.v20i32 + ; GFX10: bb.1 (%ir-block.1): + ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $sgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $sgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $sgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $sgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $sgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $sgpr19 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<20 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 + ; GFX10-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) + ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) + ; GFX10-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[EVEC2]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64) + ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; GFX10-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV4]](s32) + ; GFX10-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV5]](s32) + ; GFX10-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV6]](s32) + ; GFX10-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV7]](s32) + ; GFX10-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV8]](s32) + ; GFX10-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV9]](s32) + ; GFX10-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV10]](s32) + ; GFX10-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV11]](s32) + ; GFX10-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV12]](s32) + ; GFX10-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV13]](s32) + ; GFX10-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV14]](s32) + ; GFX10-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV15]](s32) + ; GFX10-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT16:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV16]](s32) + ; GFX10-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT16]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT17:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV17]](s32) + ; GFX10-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT17]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT18:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV18]](s32) + ; GFX10-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32) + ; GFX10-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY20]](<4 x s32>) + ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + %.i19 = extractelement <20 x i32> %0, i64 19 + %.i18 = extractelement <20 x i32> %0, i64 18 + %.i17 = extractelement <20 x i32> %0, i64 17 + %2 = call i64 @llvm.amdgcn.s.getpc() + %3 = and i64 %2, -4294967296 + %4 = zext i32 %.i17 to i64 + %5 = or disjoint i64 %3, %4 + %6 = inttoptr i64 %5 to ptr + call void @llvm.amdgcn.s.sleep(i32 2) + call void (ptr, i32, <20 x i32>, {}, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_s(ptr inreg %6, i32 inreg %.i18, <20 x i32> inreg %0, {} poison, i32 1, i32 %.i19, i32 -1, ptr nonnull @retry_vgpr_alloc.v20i32) + unreachable +} + >From 636bae850d6d939c422690b8b5e9be4ba28f2d8a Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic <ana.mihajlo...@amd.com> Date: Mon, 10 Mar 2025 16:29:18 +0100 Subject: [PATCH 2/3] isel fix --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 6 +- .../irtranslator-amdgcn-cs-chain.ll | 26 +++--- .../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll | 76 +++++++++++++++-- ...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 84 ++++++++++--------- 4 files changed, 134 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index a15f193549936..30c09227a634d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -1278,7 +1278,11 @@ bool AMDGPUCallLowering::lowerTailCall( if (auto CI = dyn_cast<ConstantInt>(Arg.OrigValue)) { MIB.addImm(CI->getSExtValue()); } else { - MIB.addReg(Arg.Regs[0]); + Register Reg = Arg.Regs[0]; + if (!MRI.getVRegDef(Reg)->isCopy()) + Reg = MIRBuilder.buildCopy(MRI.getType(Reg), Reg).getReg(0); + + MIB.addReg(Reg); unsigned Idx = MIB->getNumOperands() - 1; MIB->getOperand(Idx).setReg(constrainOperandRegClass( MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(), diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index e8ee5d8625ccb..a5a0defb833cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -166,9 +166,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX11-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 - ; GFX11-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) - ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) + ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 + ; GFX11-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) + ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) ; GFX11-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32) ; GFX11-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]] @@ -176,6 +176,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64) ; GFX11-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2 + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC1]](s32) + ; GFX11-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC]](s32) + ; GFX11-NEXT: [[COPY22:%[0-9]+]]:ccr_sgpr_64(p0) = COPY [[GV]](p0) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>) ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) @@ -217,7 +220,7 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX11-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32) ; GFX11-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32) ; GFX11-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32) - ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19 + ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[COPY20]](s32), [[COPY21]](s32), -1, [[COPY22]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19 ; ; GFX10-LABEL: name: retry_vgpr_alloc.v20i32 ; GFX10: bb.1 (%ir-block.1): @@ -250,9 +253,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 - ; GFX10-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) - ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) + ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32 + ; GFX10-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32) + ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32) ; GFX10-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32) ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc) ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]] @@ -260,6 +263,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64) ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC1]](s32) + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC]](s32) + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:ccr_sgpr_64(p0) = COPY [[GV]](p0) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>) ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) @@ -301,9 +307,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg % ; GFX10-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32) ; GFX10-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32) ; GFX10-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY20]](<4 x s32>) - ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY23]](<4 x s32>) + ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[COPY20]](s32), [[COPY21]](s32), -1, [[COPY22]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51 %.i19 = extractelement <20 x i32> %0, i64 19 %.i18 = extractelement <20 x i32> %0, i64 18 %.i17 = extractelement <20 x i32> %0, i64 17 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll index 77c9b9813571a..f766baf73f9ab 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll @@ -18,16 +18,16 @@ define amdgpu_cs_chain void @dynamic_vgprs(i32 inreg %exec, <3 x i32> inreg %sgp ; GISEL-GFX12-NEXT: s_mov_b32 s0, s1 ; GISEL-GFX12-NEXT: s_mov_b32 s1, s2 ; GISEL-GFX12-NEXT: s_mov_b32 s2, s3 -; GISEL-GFX12-NEXT: s_mov_b32 s6, callee@abs32@lo -; GISEL-GFX12-NEXT: s_mov_b32 s7, callee@abs32@hi -; GISEL-GFX12-NEXT: s_mov_b32 s8, retry_vgpr_alloc@abs32@lo -; GISEL-GFX12-NEXT: s_mov_b32 s9, retry_vgpr_alloc@abs32@hi +; GISEL-GFX12-NEXT: s_mov_b32 s6, retry_vgpr_alloc@abs32@lo +; GISEL-GFX12-NEXT: s_mov_b32 s7, retry_vgpr_alloc@abs32@hi +; GISEL-GFX12-NEXT: s_mov_b32 s8, callee@abs32@lo +; GISEL-GFX12-NEXT: s_mov_b32 s9, callee@abs32@hi ; GISEL-GFX12-NEXT: s_alloc_vgpr s4 ; GISEL-GFX12-NEXT: s_wait_alu 0xfffe -; GISEL-GFX12-NEXT: s_cselect_b64 s[6:7], s[6:7], s[8:9] +; GISEL-GFX12-NEXT: s_cselect_b64 s[8:9], s[8:9], s[6:7] ; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s5, -1 ; GISEL-GFX12-NEXT: s_wait_alu 0xfffe -; GISEL-GFX12-NEXT: s_setpc_b64 s[6:7] +; GISEL-GFX12-NEXT: s_setpc_b64 s[8:9] ; ; DAGISEL-GFX12-LABEL: dynamic_vgprs: ; DAGISEL-GFX12: ; %bb.0: @@ -95,3 +95,67 @@ define amdgpu_cs_chain void @constants(<3 x i32> inreg %sgpr, { i32, ptr addrspa } declare amdgpu_cs_chain_preserve void @retry_vgpr_alloc(<3 x i32> inreg %sgpr) + +define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %0) #10 { +; GISEL-GFX12-LABEL: retry_vgpr_alloc.v20i32: +; GISEL-GFX12: ; %bb.0: +; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL-GFX12-NEXT: s_wait_expcnt 0x0 +; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 +; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 +; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 +; GISEL-GFX12-NEXT: s_getpc_b64 s[20:21] +; GISEL-GFX12-NEXT: s_mov_b32 s22, 0 +; GISEL-GFX12-NEXT: s_wait_alu 0xfffe +; GISEL-GFX12-NEXT: s_sext_i32_i16 s21, s21 +; GISEL-GFX12-NEXT: s_mov_b32 s23, -1 +; GISEL-GFX12-NEXT: s_mov_b32 s24, s17 +; GISEL-GFX12-NEXT: s_wait_alu 0xfffe +; GISEL-GFX12-NEXT: s_and_b64 s[20:21], s[20:21], s[22:23] +; GISEL-GFX12-NEXT: s_mov_b32 s25, 0 +; GISEL-GFX12-NEXT: s_mov_b32 s22, retry_vgpr_alloc.v20i32@abs32@lo +; GISEL-GFX12-NEXT: s_wait_alu 0xfffe +; GISEL-GFX12-NEXT: s_or_b64 s[20:21], s[20:21], s[24:25] +; GISEL-GFX12-NEXT: s_mov_b32 s23, retry_vgpr_alloc.v20i32@abs32@hi +; GISEL-GFX12-NEXT: s_sleep 2 +; GISEL-GFX12-NEXT: s_alloc_vgpr s19 +; GISEL-GFX12-NEXT: s_wait_alu 0xfffe +; GISEL-GFX12-NEXT: s_cselect_b64 s[20:21], s[20:21], s[22:23] +; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s18, -1 +; GISEL-GFX12-NEXT: s_wait_alu 0xfffe +; GISEL-GFX12-NEXT: s_setpc_b64 s[20:21] +; +; DAGISEL-GFX12-LABEL: retry_vgpr_alloc.v20i32: +; DAGISEL-GFX12: ; %bb.0: +; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0 +; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0 +; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0 +; DAGISEL-GFX12-NEXT: s_getpc_b64 s[24:25] +; DAGISEL-GFX12-NEXT: s_mov_b32 s20, s17 +; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe +; DAGISEL-GFX12-NEXT: s_sext_i32_i16 s25, s25 +; DAGISEL-GFX12-NEXT: s_mov_b32 s23, retry_vgpr_alloc.v20i32@abs32@hi +; DAGISEL-GFX12-NEXT: s_mov_b32 s22, retry_vgpr_alloc.v20i32@abs32@lo +; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe +; DAGISEL-GFX12-NEXT: s_mov_b32 s21, s25 +; DAGISEL-GFX12-NEXT: s_sleep 2 +; DAGISEL-GFX12-NEXT: s_alloc_vgpr s19 +; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe +; DAGISEL-GFX12-NEXT: s_cselect_b64 s[20:21], s[20:21], s[22:23] +; DAGISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s18, -1 +; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe +; DAGISEL-GFX12-NEXT: s_setpc_b64 s[20:21] + %.i19 = extractelement <20 x i32> %0, i64 19 + %.i18 = extractelement <20 x i32> %0, i64 18 + %.i17 = extractelement <20 x i32> %0, i64 17 + %2 = call i64 @llvm.amdgcn.s.getpc() + %3 = and i64 %2, -4294967296 + %4 = zext i32 %.i17 to i64 + %5 = or disjoint i64 %3, %4 + %6 = inttoptr i64 %5 to ptr + call void @llvm.amdgcn.s.sleep(i32 2) + call void (ptr, i32, <20 x i32>, {}, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_s(ptr inreg %6, i32 inreg %.i18, <20 x i32> inreg %0, {} poison, i32 1, i32 %.i19, i32 -1, ptr nonnull @retry_vgpr_alloc.v20i32) + unreachable +} diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll index 4e040748a34d8..e160072c1cebd 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll @@ -19,28 +19,28 @@ define amdgpu_cs_chain void @direct_callee_direct_fallback(<3 x i32> inreg %sgpr ; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc + ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc + ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee - ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee - ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc - ; GISEL-GFX12-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc + ; GISEL-GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX12-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY10]], 0, 0, 15, 64, -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY11]], 0, 0, 15, 64, -1, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; DAGISEL-GFX12-LABEL: name: direct_callee_direct_fallback ; DAGISEL-GFX12: bb.0 (%ir-block.0): @@ -94,24 +94,24 @@ define amdgpu_cs_chain void @indirect_callee_direct_fallback(i32 inreg %exec, pt ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc + ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc + ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY8]] ; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY9]] - ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc - ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc - ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY14]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; DAGISEL-GFX12-LABEL: name: indirect_callee_direct_fallback ; DAGISEL-GFX12: bb.0 (%ir-block.0): @@ -165,7 +165,7 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt ; GISEL-GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GISEL-GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GISEL-GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 @@ -174,14 +174,15 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY7]] @@ -190,8 +191,8 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY14]], 0, 0, [[COPY]], [[COPY10]], -1, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY15]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; DAGISEL-GFX12-LABEL: name: direct_callee_indirect_fallback ; DAGISEL-GFX12: bb.0 (%ir-block.0): @@ -243,7 +244,7 @@ define amdgpu_cs_chain void @indirect_callee_indirect_fallback(i32 inreg %exec, ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; GISEL-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr8 @@ -252,20 +253,21 @@ define amdgpu_cs_chain void @indirect_callee_indirect_fallback(i32 inreg %exec, ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr11 ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GISEL-GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GISEL-GFX12-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec ; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY9]] ; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY10]] ; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY11]] ; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY12]] - ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY13]], [[COPY1]], [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY13]], [[COPY1]], [[COPY14]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; DAGISEL-GFX12-LABEL: name: indirect_callee_indirect_fallback ; DAGISEL-GFX12: bb.0 (%ir-block.0): >From 07e2090d07d66fbaab138c405a8015197ec991de Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic <ana.mihajlo...@amd.com> Date: Fri, 14 Mar 2025 13:48:37 +0100 Subject: [PATCH 3/3] update test --- .../AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll index f766baf73f9ab..e9ad1f39cf09b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll @@ -62,16 +62,16 @@ define amdgpu_cs_chain void @constants(<3 x i32> inreg %sgpr, { i32, ptr addrspa ; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0 ; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0 ; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0 -; GISEL-GFX12-NEXT: s_mov_b32 s4, callee@abs32@lo -; GISEL-GFX12-NEXT: s_mov_b32 s5, callee@abs32@hi -; GISEL-GFX12-NEXT: s_mov_b32 s6, retry_vgpr_alloc@abs32@lo -; GISEL-GFX12-NEXT: s_mov_b32 s7, retry_vgpr_alloc@abs32@hi +; GISEL-GFX12-NEXT: s_mov_b32 s4, retry_vgpr_alloc@abs32@lo +; GISEL-GFX12-NEXT: s_mov_b32 s5, retry_vgpr_alloc@abs32@hi +; GISEL-GFX12-NEXT: s_mov_b32 s6, callee@abs32@lo +; GISEL-GFX12-NEXT: s_mov_b32 s7, callee@abs32@hi ; GISEL-GFX12-NEXT: s_alloc_vgpr 64 ; GISEL-GFX12-NEXT: s_wait_alu 0xfffe -; GISEL-GFX12-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GISEL-GFX12-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, 7, -1 ; GISEL-GFX12-NEXT: s_wait_alu 0xfffe -; GISEL-GFX12-NEXT: s_setpc_b64 s[4:5] +; GISEL-GFX12-NEXT: s_setpc_b64 s[6:7] ; ; DAGISEL-GFX12-LABEL: constants: ; DAGISEL-GFX12: ; %bb.0: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits