Author: Ricardo Jesus Date: 2025-05-09T13:31:07-07:00 New Revision: 70eed33971d9b83fe81d837588dba64a6413b015
URL: https://github.com/llvm/llvm-project/commit/70eed33971d9b83fe81d837588dba64a6413b015 DIFF: https://github.com/llvm/llvm-project/commit/70eed33971d9b83fe81d837588dba64a6413b015.diff LOG: [InstCombine] Do not combine shuffle+bitcast if the bitcast is eliminable. (#135769) If we are attempting to combine shuffle+bitcast but the bitcast is pairable with a subsequent bitcast, we should not fold the shuffle as doing so can block further simplifications. The motivation for this is a long-standing regression affecting SIMDe on AArch64, introduced indirectly by the AlwaysInliner (1a2e77cf). Some reproducers: * https://godbolt.org/z/53qx18s6M * https://godbolt.org/z/o5e43h5M7 (cherry picked from commit c91c3f930cfc75eb4e8b623ecd59c807863aa6c0) Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6860a7cd07b78..118d2d4be828f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -3029,10 +3029,18 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector<BitCastInst *, 8> BCs; DenseMap<Type *, Value *> NewBCs; for (User *U : SVI.users()) - if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) - if (!BC->use_empty()) - // Only visit bitcasts that weren't previously handled. - BCs.push_back(BC); + if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) { + // Only visit bitcasts that weren't previously handled. + if (BC->use_empty()) + continue; + // Prefer to combine bitcasts of bitcasts before attempting this fold. + if (BC->hasOneUse()) { + auto *BC2 = dyn_cast<BitCastInst>(BC->user_back()); + if (BC2 && isEliminableCastPair(BC, BC2)) + continue; + } + BCs.push_back(BC); + } for (BitCastInst *BC : BCs) { unsigned BegIdx = Mask.front(); Type *TgtTy = BC->getDestTy(); diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll index f20077243273c..877dd1eefbae4 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -235,3 +235,38 @@ define <3 x i4> @shuf_bitcast_wrong_size(<2 x i8> %v, i8 %x) { %r = shufflevector <4 x i4> %b, <4 x i4> undef, <3 x i32> <i32 0, i32 1, i32 2> ret <3 x i4> %r } + +; Negative test - chain of bitcasts. + +define <16 x i8> @shuf_bitcast_chain(<8 x i32> %v) { +; CHECK-LABEL: @shuf_bitcast_chain( +; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[C:%.*]] = bitcast <4 x i32> [[S]] to <16 x i8> +; CHECK-NEXT: ret <16 x i8> [[C]] +; + %s = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %a = bitcast <4 x i32> %s to <2 x i64> + %b = bitcast <2 x i64> %a to i128 + %c = bitcast i128 %b to <16 x i8> + ret <16 x i8> %c +} + +; Same as above, but showing why it's not feasable to implement the reverse +; fold in VectorCombine (see #136998). + +define <4 x i32> @shuf_bitcast_chain_2(<8 x i32> %v) { +; CHECK-LABEL: @shuf_bitcast_chain_2( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[S0]], [[S1]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %s0 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %s1 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %b0 = bitcast <4 x i32> %s0 to i128 + %b1 = bitcast <4 x i32> %s1 to i128 + %c0 = bitcast i128 %b0 to <4 x i32> + %c1 = bitcast i128 %b1 to <4 x i32> + %r = or <4 x i32> %c0, %c1 + ret <4 x i32> %r +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits