llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms Author: Graham Hunter (huntergr-arm) <details> <summary>Changes</summary> If we have vectorized variants of a function which take linear parameters, we should be able to vectorize assuming the strides match. --- Patch is 26.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73941.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+24) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll (+228-19) ``````````diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09a6e01226ab68c..4b6eac56597c232 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7035,6 +7035,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { + Value *ScalarParam = CI->getArgOperand(Param.ParamPos); + // Find the stride for the scalar parameter in this loop and see if + // it matches the stride for the variant. + // TODO: do we need to figure out the cost of an extract to get the + // first lane? Or do we hope that it will be folded away? + ScalarEvolution *SE = PSE.getSE(); + const auto *SAR = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(ScalarParam)); + + if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; + } + + const SCEVConstant *Step = + dyn_cast<SCEVConstant>(SAR->getStepRecurrence(*SE)); + + if (!Step || + Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + + break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ef6b8e1d83f3811..c6faa812187c07f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { ; CHECK-LABEL: define void @test_linear ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], <vscale x 2 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_linear_nomask(ptr [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -43,16 +77,50 @@ define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 { ; CHECK-LABEL: define void @test_linear_with_mask ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], <vscale x 2 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_linear(ptr [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -77,18 +145,54 @@ define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readon ; CHECK-LABEL: define void @test_linear_with_vector ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], <vscale x 4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 4 x ptr> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @baz_vector_and_linear(<vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[DATA:%.*]] = load i32, ptr [[GEPC]], align 8 ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i32 @baz(i32 [[DATA]], ptr [[GEPB]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @baz(i32 [[DATA]], ptr [[GEPB]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -119,7 +223,7 @@ define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -149,17 +253,52 @@ define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 ; CHECK-LABEL: define void @test_linear_wide_stride ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[B]], <vscale x 2 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x ptr> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_linear_nomask_2x(ptr [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[DOUBLE:%.*]] = mul i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[DOUBLE]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4]] +; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR5]] ; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -185,17 +324,53 @@ define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readon ; CHECK-LABEL: define void @test_linear_mixed_types ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHEC... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/73941 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits