https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/131308
It's better to widen them to avoid it being lowered into a G_ASHR + G_SHL. With this change we just extend to i32 then trunc the result. >From 815595b1ca20b613b5b4b08cafedda93e397cf92 Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Wed, 5 Mar 2025 13:41:04 +0100 Subject: [PATCH] [AMDGPU][Legalizer] Widen i16 G_SEXT_INREG It's better to widen them to avoid it being lowered into a G_ASHR + G_SHL. With this change we just extend to i32 then trunc the result. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 +- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll | 7 +- .../AMDGPU/GlobalISel/legalize-abs.mir | 8 +- .../AMDGPU/GlobalISel/legalize-ashr.mir | 20 +-- .../AMDGPU/GlobalISel/legalize-sext-inreg.mir | 155 +++++++----------- .../AMDGPU/GlobalISel/legalize-sext.mir | 101 ++++++------ .../AMDGPU/GlobalISel/legalize-smax.mir | 33 +++- .../AMDGPU/GlobalISel/legalize-smin.mir | 33 +++- .../AMDGPU/GlobalISel/legalize-smulh.mir | 132 +++++++-------- .../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 45 ++--- .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll | 130 ++++++--------- 11 files changed, 299 insertions(+), 368 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b3a8183beeacf..6e611ebb4b625 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2009,7 +2009,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // S64 is only legal on SALU, and needs to be broken into 32-bit elements in // RegBankSelect. auto &SextInReg = getActionDefinitionsBuilder(G_SEXT_INREG) - .legalFor({{S32}, {S64}}); + .legalFor({{S32}, {S64}}) + .widenScalarIf(typeIs(0, S16), widenScalarOrEltToNextPow2(0, 32)); if (ST.hasVOP3PInsts()) { SextInReg.lowerFor({{V2S16}}) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll index 493e8cef63890..f81d7f1c300b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -17,8 +17,7 @@ define i8 @v_ashr_i8(i8 %value, i8 %amount) { ; GFX8-LABEL: v_ashr_i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1 +; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ashr_i8: @@ -49,8 +48,8 @@ define i8 @v_ashr_i8_7(i8 %value) { ; GFX8-LABEL: v_ashr_i8_7: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, 7 +; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ashr_i8_7: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir index a9fe80eb47e76..2b911b2dce697 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir @@ -144,11 +144,9 @@ body: | ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[ASHR]] + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index f4aaab745e03b..53905a2f49dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -319,12 +319,10 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; ; GFX9PLUS-LABEL: name: test_ashr_i8_i8 @@ -374,12 +372,10 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; ; GFX9PLUS-LABEL: name: test_ashr_s7_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 40c48e10f933f..1ab84eb08abf6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -426,11 +426,9 @@ body: | ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) ; ; GFX6-LABEL: name: test_sext_inreg_s16_1 ; GFX6: liveins: $vgpr0 @@ -464,11 +462,9 @@ body: | ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 15 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX8-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) ; ; GFX6-LABEL: name: test_sext_inreg_s16_15 ; GFX6: liveins: $vgpr0 @@ -821,19 +817,15 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; @@ -907,38 +899,31 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL5]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -1101,32 +1086,24 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1188,45 +1165,33 @@ body: | ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C1]](s16) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C1]](s16) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C1]](s16) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C1]](s16) - ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) - ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) - ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1 + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) - ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR4]](s16) - ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR5]](s16) - ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index 847ffc8aadc07..a8132862c1643 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -240,11 +240,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(s16) = G_SEXT %1 @@ -653,66 +651,65 @@ body: | ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[C3]](s16) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C5]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C3]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C5]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C5]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL7]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL8]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C3]](s16) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C3]](s16) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C5]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C5]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C5]](s16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C5]](s16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL11]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C1]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL12]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C1]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s88) = G_TRUNC [[MV2]](s704) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 34daf8e8a358d..52b002f75f74d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; VI-LABEL: name: test_smax_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; GFX9-LABEL: name: test_smax_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ body: | ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_smax_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ body: | ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_smax_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -89,6 +93,7 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; VI-LABEL: name: test_smax_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -99,6 +104,7 @@ body: | ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smax_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -133,21 +139,20 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; VI-LABEL: name: test_smax_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smax_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -184,6 +189,7 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; VI-LABEL: name: test_smax_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -193,6 +199,7 @@ body: | ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) + ; ; GFX9-LABEL: name: test_smax_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -228,6 +235,7 @@ body: | ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_smax_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -239,6 +247,7 @@ body: | ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smax_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -274,6 +283,7 @@ body: | ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_smax_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -286,6 +296,7 @@ body: | ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_smax_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -333,6 +344,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_smax_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -355,6 +367,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_smax_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -399,6 +412,7 @@ body: | ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_smax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -427,6 +441,7 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -500,6 +515,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_smax_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -540,6 +556,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 90bb0122c3889..a59b6088122ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -18,6 +18,7 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; VI-LABEL: name: test_smin_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; GFX9-LABEL: name: test_smin_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ body: | ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_smin_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ body: | ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_smin_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -89,6 +93,7 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; VI-LABEL: name: test_smin_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -99,6 +104,7 @@ body: | ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smin_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -133,21 +139,20 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; VI-LABEL: name: test_smin_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smin_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -184,6 +189,7 @@ body: | ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; VI-LABEL: name: test_smin_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -193,6 +199,7 @@ body: | ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) + ; ; GFX9-LABEL: name: test_smin_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -228,6 +235,7 @@ body: | ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_smin_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -239,6 +247,7 @@ body: | ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -274,6 +283,7 @@ body: | ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_smin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -286,6 +296,7 @@ body: | ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_smin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -333,6 +344,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_smin_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -355,6 +367,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_smin_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -399,6 +412,7 @@ body: | ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_smin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -427,6 +441,7 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -500,6 +515,7 @@ body: | ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_smin_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -540,6 +556,7 @@ body: | ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir index 51fffb7551a12..cd69104851560 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir @@ -115,18 +115,16 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 - ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 + ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) ; ; GFX9-LABEL: name: test_smulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -227,28 +225,24 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C]](s16) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C]](s16) - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[ASHR3]], [[ASHR4]] - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16) + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32) + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8 + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32) + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC2]], [[TRUNC3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR5]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; @@ -322,54 +316,46 @@ body: | ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C3]](s16) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]] - ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C3]](s16) - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[ASHR3]], [[ASHR4]] - ; GFX8-NEXT: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR6:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR7:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C3]](s16) - ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[ASHR6]], [[ASHR7]] - ; GFX8-NEXT: [[ASHR8:%[0-9]+]]:_(s16) = G_ASHR [[MUL2]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR9:%[0-9]+]]:_(s16) = G_ASHR [[SHL6]], [[C3]](s16) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC7]], [[C3]](s16) - ; GFX8-NEXT: [[ASHR10:%[0-9]+]]:_(s16) = G_ASHR [[SHL7]], [[C3]](s16) - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[ASHR9]], [[ASHR10]] - ; GFX8-NEXT: [[ASHR11:%[0-9]+]]:_(s16) = G_ASHR [[MUL3]], [[C3]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C3]](s16) + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32) + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 8 + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32) + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC2]], [[TRUNC3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C3]](s16) + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 8 + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG4]](s32) + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 8 + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG5]](s32) + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[TRUNC4]], [[TRUNC5]] + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL2]], [[C3]](s16) + ; GFX8-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 8 + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG6]](s32) + ; GFX8-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 8 + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG7]](s32) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[TRUNC6]], [[TRUNC7]] + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[MUL3]], [[C3]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] - ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR5]](s16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] - ; GFX8-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR8]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C4]] - ; GFX8-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL9]] - ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR11]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR3]](s16) ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C4]] - ; GFX8-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL10]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; GFX9-LABEL: name: test_smulh_v4s8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll index 41e915a4c1011..8c687d85ac24b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll @@ -207,12 +207,8 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) { ; ; GFX8-LABEL: abs_sgpr_v2i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshl_b32 s0, s0, 8 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_ashr_i32 s0, s0, 8 -; GFX8-NEXT: s_ashr_i32 s1, s1, 8 +; GFX8-NEXT: s_sext_i32_i8 s0, s0 +; GFX8-NEXT: s_sext_i32_i8 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_abs_i32 s0, s0 @@ -247,13 +243,11 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { ; ; GFX8-LABEL: abs_vgpr_v2i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX8-NEXT: v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: ; return to shader part epilog @@ -286,15 +280,9 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) { ; ; GFX8-LABEL: abs_sgpr_v3i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshl_b32 s0, s0, 8 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_ashr_i32 s0, s0, 8 -; GFX8-NEXT: s_ashr_i32 s1, s1, 8 -; GFX8-NEXT: s_ashr_i32 s2, s2, 8 +; GFX8-NEXT: s_sext_i32_i8 s0, s0 +; GFX8-NEXT: s_sext_i32_i8 s1, s1 +; GFX8-NEXT: s_sext_i32_i8 s2, s2 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 ; GFX8-NEXT: s_sext_i32_i16 s2, s2 @@ -338,16 +326,13 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; ; GFX8-LABEL: abs_vgpr_v3i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 -; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX8-NEXT: v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: v_readfirstlane_b32 s2, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll index 8300e2542d452..e537c3a2a9438 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -511,24 +511,11 @@ define amdgpu_ps <16 x i32> @s_sext_inreg_v16i32_3(<16 x i32> inreg %value) { } define i16 @v_sext_inreg_i16_4(i16 %value) { -; GFX6-LABEL: v_sext_inreg_i16_4: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 12 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_sext_inreg_i16_4: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v0, 4, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_sext_inreg_i16_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 12 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_sext_inreg_i16_4: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 12 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i16_4: ; GFX10PLUS: ; %bb.0: @@ -541,24 +528,11 @@ define i16 @v_sext_inreg_i16_4(i16 %value) { } define i16 @v_sext_inreg_i16_15(i16 %value) { -; GFX6-LABEL: v_sext_inreg_i16_15: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_sext_inreg_i16_15: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_sext_inreg_i16_15: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_sext_inreg_i16_15: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: v_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: @@ -643,12 +617,11 @@ define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) { ; GFX8-LABEL: v_sext_inreg_v2i16_8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-NEXT: v_or_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff +; GFX8-NEXT: v_and_b32_sdwa v2, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v2i16_8: @@ -680,12 +653,11 @@ define <2 x i16> @v_sext_inreg_v2i16_15(<2 x i16> %value) { ; GFX8-LABEL: v_sext_inreg_v2i16_15: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v2, 15 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v1 -; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_bfe_i32 v1, v0, 0, 1 +; GFX8-NEXT: v_bfe_i32 v0, v0, 16, 1 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v2i16_15: @@ -798,17 +770,16 @@ define <2 x float> @v_sext_inreg_v4i16_3(<4 x i16> %value) { ; GFX8-LABEL: v_sext_inreg_v4i16_3: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v3, 3 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, 3, v0 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshlrev_b16_e32 v4, 3, v1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v2 -; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v4 -; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_bfe_i32 v2, v0, 0, 13 +; GFX8-NEXT: v_bfe_i32 v0, v0, 16, 13 +; GFX8-NEXT: v_bfe_i32 v3, v1, 0, 13 +; GFX8-NEXT: v_bfe_i32 v1, v1, 16, 13 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v4i16_3: @@ -982,27 +953,26 @@ define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) { ; GFX8-LABEL: v_sext_inreg_v8i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v5, 11 -; GFX8-NEXT: v_lshlrev_b16_e32 v4, 11, v0 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, 11, v1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v4 -; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b16_e32 v7, 11, v2 -; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v6 -; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b16_e32 v8, 11, v3 -; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 -; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v7 -; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 -; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v8 -; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX8-NEXT: v_bfe_i32 v4, v0, 0, 5 +; GFX8-NEXT: v_bfe_i32 v0, v0, 16, 5 +; GFX8-NEXT: v_bfe_i32 v5, v1, 0, 5 +; GFX8-NEXT: v_bfe_i32 v1, v1, 16, 5 +; GFX8-NEXT: v_bfe_i32 v6, v2, 0, 5 +; GFX8-NEXT: v_bfe_i32 v2, v2, 16, 5 +; GFX8-NEXT: v_bfe_i32 v7, v3, 0, 5 +; GFX8-NEXT: v_bfe_i32 v3, v3, 16, 5 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX8-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v8i16_11: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits