https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/181292
>From b99ff35e3648564d0470a80e36321bf7d313580f Mon Sep 17 00:00:00 2001 From: Aiden Grossman <[email protected]> Date: Fri, 13 Feb 2026 02:03:03 +0000 Subject: [PATCH 1/2] tests Created using spr 1.3.7 --- .../LowerMatrixIntrinsics/multiply-fused.ll | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll index 430358f0a5138..c3a81b79f6de5 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart ; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=9999 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s ; REQUIRES: aarch64-registered-target @@ -8,18 +8,18 @@ target triple = "aarch64-apple-ios" ; Test tiling without generating explicit loops. -define void @multiply(ptr %A, ptr %B, ptr %C) { +define void @multiply(ptr %A, ptr %B, ptr %C) !prof !0 { ; CHECK-LABEL: @multiply( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint ptr [[C:%.*]] to i64 ; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 128 ; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint ptr [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] -; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: alias_cont: ; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 128 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] -; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] +; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]], !prof [[PROF1]] ; CHECK: copy: ; CHECK-NEXT: [[TMP2:%.*]] = alloca [16 x double], align 8 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP2]], ptr noundef nonnull align 8 dereferenceable(128) [[A]], i64 128, i1 false) @@ -30,11 +30,11 @@ define void @multiply(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 128 ; CHECK-NEXT: [[LOAD_BEGIN6:%.*]] = ptrtoint ptr [[B:%.*]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]] -; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]] +; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]], !prof [[PROF1]] ; CHECK: alias_cont1: ; CHECK-NEXT: [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 128 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]] -; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]] +; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]], !prof [[PROF1]] ; CHECK: copy2: ; CHECK-NEXT: [[TMP6:%.*]] = alloca [16 x double], align 8 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP6]], ptr noundef nonnull align 8 dereferenceable(128) [[B]], i64 128, i1 false) @@ -375,3 +375,8 @@ entry: } declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32) + +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: [[PROF1]] = !{!"unknown", !"lower-matrix-intrinsics"} +;. >From 61e144b9f0add7f71edc60f7d6889516fd2b4150 Mon Sep 17 00:00:00 2001 From: Aiden Grossman <[email protected]> Date: Fri, 13 Feb 2026 03:17:34 +0000 Subject: [PATCH 2/2] fix test, feedback Created using spr 1.3.7 --- llvm/lib/Transforms/Utils/MatrixUtils.cpp | 3 +++ .../multiply-fused-loops.ll | 20 +++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp index e84522276219c..262e4c99365d7 100644 --- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp +++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp @@ -51,6 +51,9 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); auto *BR = BranchInst::Create(Header, Exit, Cond, Latch); if (!ProfcheckDisableMetadataFixes) { + assert(Step->getZExtValue() != 0 && + "Expected a non-zero step size. A step size of zero produces an " + "infinite loop which massively skews profile data."); MDBuilder MDB(Preheader->getContext()); setFittedBranchWeights( *BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false); diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll index 8c6d142550abd..498102f70c42b 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart ; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=0 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s ; REQUIRES: aarch64-registered-target @@ -360,10 +360,14 @@ entry: } declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32) - -; CHECK: !0 = distinct !{!0, !1} -; CHECK-NEXT: !1 = !{!"llvm.loop.unroll.count", i32 2} -; CHECK-NEXT: !2 = distinct !{!2, !1} -; CHECK-NEXT: !3 = distinct !{!3, !4} -; CHECK-NEXT: !4 = !{!"llvm.loop.unroll.count", i32 1} -; CHECK-NEXT: !5 = distinct !{!5, !4} +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 2} +; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 2} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]} +; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]]} +; CHECK: [[META6]] = !{!"llvm.loop.unroll.count", i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 1, i32 4} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]]} +;. _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
