================
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --version 6
+; RUN: opt -S -mcpu=neoverse-v2 -passes=loop-vectorize -mtriple=aarch64 < %s |
FileCheck %s
+target triple = "aarch64"
+
+; Check that a partial reduction is reverted back to a regular reduction,
+; so that we compare "the VPlan with the best kind of reduction for <range>"
+; vs "the VPlan with the best kind of reduction for <other range>",
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read)
uwtable vscale_range(1,16)
+define dso_local i64 @foo(ptr noundef readonly captures(none) %0, i32 noundef
%1) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local i64 @foo(
+; CHECK-SAME: ptr noundef readonly captures(none) [[TMP0:%.*]], i32 noundef
[[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label %[[ITER_CHECK:.*]], label %[[BB27:.*]]
+; CHECK: [[ITER_CHECK]]:
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP1]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label
%[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]],
label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [
[[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer,
%[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer,
%[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer,
%[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer,
%[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]],
i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]],
i32 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]],
i32 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]],
i32 12
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[WIDE_LOAD5]] to <4 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[WIDE_LOAD6]] to <4 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i32> [[WIDE_LOAD7]] to <4 x i64>
+; CHECK-NEXT: [[TMP13]] = add <4 x i64> [[VEC_PHI]], [[TMP9]]
+; CHECK-NEXT: [[TMP14]] = add <4 x i64> [[VEC_PHI2]], [[TMP10]]
+; CHECK-NEXT: [[TMP15]] = add <4 x i64> [[VEC_PHI3]], [[TMP11]]
+; CHECK-NEXT: [[TMP16]] = add <4 x i64> [[VEC_PHI4]], [[TMP12]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label
%[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
----------------
sushgokh wrote:
you dont need to check post middle block. You can use something like
'--filter-out-after' when generating the test
https://github.com/llvm/llvm-project/pull/166138
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits