Author: Anton Afanasyev Date: 2022-03-01T14:29:22-08:00 New Revision: da33d400682a8cf93062fe61a9f0b6ec1d60c8ad
URL: https://github.com/llvm/llvm-project/commit/da33d400682a8cf93062fe61a9f0b6ec1d60c8ad DIFF: https://github.com/llvm/llvm-project/commit/da33d400682a8cf93062fe61a9f0b6ec1d60c8ad.diff LOG: [SLP] Don't try to vectorize pair with insertelement Particularly this breaks vectorization of insertelements where some of intermediate (i.e. not last) insertelements are used externally. Fixes PR52275 Fixes #51617 Reviewed by: ABataev Differential Revision: https://reviews.llvm.org/D119679 (cherry picked from commit b7574b0) Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9eafd94efea28..644372483edde 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3849,13 +3849,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, ValueSet SourceVectors; for (Value *V : VL) { SourceVectors.insert(cast<Instruction>(V)->getOperand(0)); - if (getInsertIndex(V) == None) { - LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with " - "non-constant or undef index.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); - BS.cancelScheduling(VL, VL0); - return; - } + assert(getInsertIndex(V) != None && "Non-constant or undef index?"); } if (count_if(VL, [&SourceVectors](Value *V) { @@ -8343,6 +8337,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; + if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B)) + return false; Value *VL[] = {A, B}; return tryToVectorizeList(VL, R); } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll index 6794553ffd6d0..9205ef0b375d1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll @@ -1,16 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s -define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) { -; CHECK-LABEL: @pr52275( -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> -; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> -; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]] -; CHECK-NEXT: ret <4 x i8> [[V2]] -; +define <4 x i8> @test(<4 x i8> %v, i8* %x) { %x0 = load i8, i8* %x, align 4 %g1 = getelementptr inbounds i8, i8* %x, i64 1 %x1 = load i8, i8* %g1, align 4 @@ -19,3 +10,139 @@ define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) { %v2 = add <4 x i8> %v0, %v1 ret <4 x i8> %v2 } + +define <2 x i8> @test2(<2 x i8> %t6, i32* %t1) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0 +; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1 +; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] +; CHECK-NEXT: ret <2 x i8> [[T11]] +; +; FORCE_SLP-LABEL: @test2( +; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0 +; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1 +; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] +; FORCE_SLP-NEXT: ret <2 x i8> [[T11]] +; + %t3 = load i32, i32* %t1, align 4 + %t4 = getelementptr inbounds i32, i32* %t1, i64 1 + %t5 = load i32, i32* %t4, align 4 + %t7 = trunc i32 %t3 to i8 + %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0 + %t9 = trunc i32 %t5 to i8 + %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 1 + %t11 = add <2 x i8> %t10, %t8 + ret <2 x i8> %t11 +} + +define <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) { +; CHECK-LABEL: @test_reorder( +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1 +; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0 +; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] +; CHECK-NEXT: ret <2 x i8> [[T11]] +; +; FORCE_SLP-LABEL: @test_reorder( +; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1 +; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0 +; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] +; FORCE_SLP-NEXT: ret <2 x i8> [[T11]] +; + %t3 = load i32, i32* %t1, align 4 + %t4 = getelementptr inbounds i32, i32* %t1, i64 1 + %t5 = load i32, i32* %t4, align 4 + %t7 = trunc i32 %t3 to i8 + %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1 + %t9 = trunc i32 %t5 to i8 + %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 0 + %t11 = add <2 x i8> %t10, %t8 + ret <2 x i8> %t11 +} + +define <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) { +; CHECK-LABEL: @test_subvector( +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0 +; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1 +; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] +; CHECK-NEXT: ret <4 x i8> [[T11]] +; +; FORCE_SLP-LABEL: @test_subvector( +; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0 +; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1 +; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] +; FORCE_SLP-NEXT: ret <4 x i8> [[T11]] +; + %t3 = load i32, i32* %t1, align 4 + %t4 = getelementptr inbounds i32, i32* %t1, i64 1 + %t5 = load i32, i32* %t4, align 4 + %t7 = trunc i32 %t3 to i8 + %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0 + %t9 = trunc i32 %t5 to i8 + %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 1 + %t11 = add <4 x i8> %t10, %t8 + ret <4 x i8> %t11 +} + +define <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) { +; CHECK-LABEL: @test_subvector_reorder( +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3 +; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2 +; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] +; CHECK-NEXT: ret <4 x i8> [[T11]] +; +; FORCE_SLP-LABEL: @test_subvector_reorder( +; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4 +; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1 +; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4 +; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 +; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3 +; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 +; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2 +; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] +; FORCE_SLP-NEXT: ret <4 x i8> [[T11]] +; + %t3 = load i32, i32* %t1, align 4 + %t4 = getelementptr inbounds i32, i32* %t1, i64 1 + %t5 = load i32, i32* %t4, align 4 + %t7 = trunc i32 %t3 to i8 + %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3 + %t9 = trunc i32 %t5 to i8 + %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 2 + %t11 = add <4 x i8> %t10, %t8 + ret <4 x i8> %t11 +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
