llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) <details> <summary>Changes</summary> Bitcasts preserve undef/poison status, but vector bitcasts can change which source lanes cover a demanded result lane. Map the demanded element mask through fixed-length vector bitcasts before checking the source where possible. AI note: an LLM generated the code and the test, I've read them Co-Authored-By: OpenAI Codex <codex@<!-- -->openai.com> --- <sub>Stack created with <a href="https://github.com/github/gh-stack">GitHub Stacks CLI</a> • <a href="https://gh.io/stacks-feedback">Give Feedback 💬</a></sub> --- Full diff: https://github.com/llvm/llvm-project/pull/200933.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+41) - (added) llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll (+102) ``````````diff diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 748520a28ffae..072a918115bbd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5654,6 +5654,47 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, case ISD::UNDEF: return !includesUndef(Kind); + case ISD::BITCAST: { + if (!DemandedElts) + return true; + + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + + if (!SrcVT.isFixedLengthVector() || !DstVT.isFixedLengthVector()) + return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1); + + unsigned SrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned DstEltBits = DstVT.getScalarSizeInBits(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumDstElts = DstVT.getVectorNumElements(); + + if (SrcEltBits == DstEltBits) + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedElts, Kind, + Depth + 1); + + if (SrcEltBits < DstEltBits) { + if (DstEltBits % SrcEltBits != 0) + return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1); + + assert(NumSrcElts == NumDstElts * (DstEltBits / SrcEltBits) && + "Unexpected fixed-width vector bitcast"); + APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, Kind, + Depth + 1); + } + + if (SrcEltBits % DstEltBits != 0) + return isGuaranteedNotToBeUndefOrPoison(Src, Kind, Depth + 1); + + assert(NumDstElts == NumSrcElts * (SrcEltBits / DstEltBits) && + "Unexpected fixed-width vector bitcast"); + APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, Kind, + Depth + 1); + } + case ISD::BUILD_VECTOR: // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - // this shouldn't affect the result. diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll new file mode 100644 index 0000000000000..27527cfd9eeff --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-freeze-bitcast-demanded-elts.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \ +; RUN: --check-prefix=COMBINE \ +; RUN: --implicit-check-not=V_ADD_U64_PSEUDO --implicit-check-not=REG_SEQUENCE +; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \ +; RUN: -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s \ +; RUN: | FileCheck %s --check-prefix=NOCOMBINE + +declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg) + +define <4 x i32> @freeze_extract_bitcast_demanded(<2 x i64> %a, <2 x i64> %b) nounwind { + ; COMBINE-LABEL: name: freeze_extract_bitcast_demanded + ; COMBINE: bb.0 (%ir-block.0): + ; COMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; COMBINE-NEXT: {{ $}} + ; COMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; COMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; COMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; COMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; COMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY3]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; COMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; COMBINE-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + ; COMBINE-NEXT: $vgpr1 = COPY [[V_LSHRREV_B32_e64_1]] + ; COMBINE-NEXT: $vgpr2 = COPY [[V_LSHRREV_B32_e64_2]] + ; COMBINE-NEXT: $vgpr3 = COPY [[V_LSHRREV_B32_e64_3]] + ; COMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; NOCOMBINE-LABEL: name: freeze_extract_bitcast_demanded + ; NOCOMBINE: bb.0 (%ir-block.0): + ; NOCOMBINE-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; NOCOMBINE-NEXT: {{ $}} + ; NOCOMBINE-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; NOCOMBINE-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; NOCOMBINE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; NOCOMBINE-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; NOCOMBINE-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; NOCOMBINE-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; NOCOMBINE-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; NOCOMBINE-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[COPY8:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub1 + ; NOCOMBINE-NEXT: [[COPY9:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[COPY10:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1 + ; NOCOMBINE-NEXT: [[COPY11:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; NOCOMBINE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[V_ADD_U:%[0-9]+]]:vreg_64_align2 = nsw V_ADD_U64_PSEUDO killed [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], implicit-def dead $vcc, implicit $exec + ; NOCOMBINE-NEXT: [[COPY12:%[0-9]+]]:av_32 = COPY [[V_ADD_U]].sub1 + ; NOCOMBINE-NEXT: [[COPY13:%[0-9]+]]:av_32 = COPY [[V_ADD_U]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; NOCOMBINE-NEXT: [[V_ADD_U1:%[0-9]+]]:vreg_64_align2 = nsw V_ADD_U64_PSEUDO killed [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], implicit-def dead $vcc, implicit $exec + ; NOCOMBINE-NEXT: [[COPY14:%[0-9]+]]:av_32 = COPY [[V_ADD_U1]].sub1 + ; NOCOMBINE-NEXT: [[COPY15:%[0-9]+]]:av_32 = COPY [[V_ADD_U1]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:av_256_align2 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3, killed [[COPY15]], %subreg.sub4, killed [[COPY14]], %subreg.sub5, killed [[COPY13]], %subreg.sub6, killed [[COPY12]], %subreg.sub7 + ; NOCOMBINE-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub7 + ; NOCOMBINE-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY16]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub6 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY17]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub5 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY18]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub4 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY19]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub3 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_4:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY20]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub2 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_5:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY21]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub1 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_6:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY22]], implicit $exec + ; NOCOMBINE-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub0 + ; NOCOMBINE-NEXT: [[V_LSHRREV_B32_e64_7:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_2]], killed [[COPY23]], implicit $exec + ; NOCOMBINE-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_256_align2 = REG_SEQUENCE killed [[V_LSHRREV_B32_e64_7]], %subreg.sub0, killed [[V_LSHRREV_B32_e64_6]], %subreg.sub1, killed [[V_LSHRREV_B32_e64_5]], %subreg.sub2, killed [[V_LSHRREV_B32_e64_4]], %subreg.sub3, killed [[V_LSHRREV_B32_e64_3]], %subreg.sub4, killed [[V_LSHRREV_B32_e64_2]], %subreg.sub5, killed [[V_LSHRREV_B32_e64_1]], %subreg.sub6, killed [[V_LSHRREV_B32_e64_]], %subreg.sub7 + ; NOCOMBINE-NEXT: [[COPY24:%[0-9]+]]:av_256_align2 = COPY killed [[REG_SEQUENCE6]] + ; NOCOMBINE-NEXT: [[COPY25:%[0-9]+]]:av_32 = COPY [[COPY24]].sub3 + ; NOCOMBINE-NEXT: [[COPY26:%[0-9]+]]:av_32 = COPY [[COPY24]].sub2 + ; NOCOMBINE-NEXT: [[COPY27:%[0-9]+]]:av_32 = COPY [[COPY24]].sub1 + ; NOCOMBINE-NEXT: [[COPY28:%[0-9]+]]:av_32 = COPY [[COPY24]].sub0 + ; NOCOMBINE-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:av_128_align2 = REG_SEQUENCE killed [[COPY28]], %subreg.sub0, killed [[COPY27]], %subreg.sub1, killed [[COPY26]], %subreg.sub2, killed [[COPY25]], %subreg.sub3 + ; NOCOMBINE-NEXT: [[COPY29:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub0 + ; NOCOMBINE-NEXT: [[COPY30:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub1 + ; NOCOMBINE-NEXT: [[COPY31:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub2 + ; NOCOMBINE-NEXT: [[COPY32:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE7]].sub3 + ; NOCOMBINE-NEXT: $vgpr0 = COPY [[COPY29]] + ; NOCOMBINE-NEXT: $vgpr1 = COPY [[COPY30]] + ; NOCOMBINE-NEXT: $vgpr2 = COPY [[COPY31]] + ; NOCOMBINE-NEXT: $vgpr3 = COPY [[COPY32]] + ; NOCOMBINE-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %poisonable = add nsw <2 x i64> %b, <i64 9223372036854775807, i64 9223372036854775807> + %wide = shufflevector <2 x i64> %a, <2 x i64> %poisonable, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %bc = bitcast <4 x i64> %wide to <8 x i32> + %shifted = lshr <8 x i32> %bc, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %fr = freeze <8 x i32> %shifted + %ext = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %fr, i64 0) + ret <4 x i32> %ext +} `````````` </details> https://github.com/llvm/llvm-project/pull/200933 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
