https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/174490
>From be752cf0d31a136b586761177c6d032665387407 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <[email protected]> Date: Mon, 15 Dec 2025 17:25:57 -0800 Subject: [PATCH] Memset --- .../Transforms/Utils/LowerMemIntrinsics.h | 6 +- .../Transforms/Utils/LowerMemIntrinsics.cpp | 144 +++++++++++++----- .../X86/memcpy-inline-non-constant-len.ll | 49 +++--- .../X86/memset-inline-non-constant-len.ll | 28 +++- .../X86/memset-pattern.ll | 20 +-- 5 files changed, 173 insertions(+), 74 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h index d4e72a60fc1ea..1ec150330484d 100644 --- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -38,7 +38,8 @@ LLVM_ABI void createMemCpyLoopUnknownSize( Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, - std::optional<unsigned> AtomicSize = std::nullopt); + std::optional<unsigned> AtomicSize = std::nullopt, + std::optional<uint64_t> AverageTripCount = std::nullopt); /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a /// compile time constant. Loop is inserted at \p InsertBefore. @@ -46,7 +47,8 @@ LLVM_ABI void createMemCpyLoopKnownSize( Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, - std::optional<uint32_t> AtomicCpySize = std::nullopt); + std::optional<uint32_t> AtomicCpySize = std::nullopt, + std::optional<uint64_t> AverageTripCount = std::nullopt); /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 4ab99edd64baa..531727910477e 100644 --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -12,15 +12,23 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/ProfDataUtils.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include <limits> #include <optional> #define DEBUG_TYPE "lower-mem-intrinsics" using namespace llvm; +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + /// \returns \p Len urem \p OpSize, checking for optimization opportunities. /// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize. static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, @@ -60,6 +68,24 @@ struct LoopExpansionInfo { /// required. Value *ResidualLoopIndex = nullptr; }; + +std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) { + if (ProfcheckDisableMetadataFixes) + return std::nullopt; + if (const auto Len = I.getLengthInBytes()) + return Len->getZExtValue(); + uint64_t Total = 0; + auto ProfData = + getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize, + std::numeric_limits<uint32_t>::max(), Total); + if (!Total) + return std::nullopt; + uint64_t TripCount = 0; + for (const auto &KV : ProfData) + TripCount += KV.Count * KV.Value; + return std::round(1.0 * TripCount / Total); +} + } // namespace /// Insert the control flow and loop counters for a memcpy/memset loop @@ -94,10 +120,11 @@ struct LoopExpansionInfo { /// to \p MainLoopStep. /// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and /// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object. -static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, - Value *Len, unsigned MainLoopStep, - unsigned ResidualLoopStep, - StringRef BBNamePrefix) { +static LoopExpansionInfo +insertLoopExpansion(Instruction *InsertBefore, Value *Len, + unsigned MainLoopStep, unsigned ResidualLoopStep, + StringRef BBNamePrefix, + std::optional<uint64_t> AverageTripCount) { assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) && "ResidualLoopStep must divide MainLoopStep if specified"); assert(ResidualLoopStep <= MainLoopStep && @@ -175,9 +202,18 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, ConstantInt *Zero = ConstantInt::get(ILenType, 0U); if (MustTakeMainLoop) PreLoopBuilder.CreateBr(MainLoopBB); - else - PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), - MainLoopBB, ResidualCondBB); + else { + auto *BR = PreLoopBuilder.CreateCondBr( + PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB, + ResidualCondBB); + if (AverageTripCount.has_value()) { + MDBuilder MDB(ParentFunc->getContext()); + setFittedBranchWeights( + *BR, {AverageTripCount.value() % MainLoopStep, 1}, false); + } else { + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); + } + } PreLoopBB->getTerminator()->eraseFromParent(); // Stay in the MainLoop until we have handled all the LoopUnits. Then go to @@ -222,22 +258,32 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, PreLoopBuilder.CreateBr(MainLoopBB); } else { ConstantInt *Zero = ConstantInt::get(ILenType, 0U); + MDBuilder B(ParentFunc->getContext()); PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), - MainLoopBB, PostLoopBB); + MainLoopBB, PostLoopBB, + B.createLikelyBranchWeights()); } PreLoopBB->getTerminator()->eraseFromParent(); // Stay in the MainLoop until we have handled all the LoopUnits. - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), - MainLoopBB, PostLoopBB); + auto *Br = LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB); + if (AverageTripCount.has_value()) + setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1}, + /*IsExpected=*/false); + else + setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE); } return LEI; } -void llvm::createMemCpyLoopKnownSize( - Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, - ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, - std::optional<uint32_t> AtomicElementSize) { +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + Align SrcAlign, Align DstAlign, + bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, + const TargetTransformInfo &TTI, + std::optional<uint32_t> AtomicElementSize, + std::optional<uint64_t> AverageTripCount) { // No need to expand zero length copies. if (CopyLen->isZero()) return; @@ -269,8 +315,9 @@ void llvm::createMemCpyLoopKnownSize( // Skip the loop expansion entirely if the loop would never be taken. if (LoopEndCount != 0) { - LoopExpansionInfo LEI = insertLoopExpansion(InsertBefore, CopyLen, - LoopOpSize, 0, "static-memcpy"); + LoopExpansionInfo LEI = + insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0, + "static-memcpy", AverageTripCount); // Fill MainLoopBB IRBuilder<> MainLoopBuilder(LEI.MainLoopIP); @@ -357,7 +404,8 @@ void llvm::createMemCpyLoopUnknownSize( Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, - std::optional<uint32_t> AtomicElementSize) { + std::optional<uint32_t> AtomicElementSize, + std::optional<uint64_t> AverageTripCount) { BasicBlock *PreLoopBB = InsertBefore->getParent(); Function *ParentFunc = PreLoopBB->getParent(); const DataLayout &DL = ParentFunc->getDataLayout(); @@ -387,8 +435,9 @@ void llvm::createMemCpyLoopUnknownSize( assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) && "Store size is expected to match type size"); - LoopExpansionInfo LEI = insertLoopExpansion( - InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize, "dynamic-memcpy"); + LoopExpansionInfo LEI = + insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize, + "dynamic-memcpy", AverageTripCount); // Fill MainLoopBB IRBuilder<> MainLoopBuilder(LEI.MainLoopIP); @@ -931,6 +980,7 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore, static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, + std::optional<uint64_t> AverageTripCount, bool IsVolatile) { Type *TypeOfCopyLen = CopyLen->getType(); BasicBlock *OrigBB = InsertBefore->getParent(); @@ -943,9 +993,16 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, IRBuilder<> Builder(OrigBB->getTerminator()); - Builder.CreateCondBr( + auto *ToLoopBR = Builder.CreateCondBr( Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, LoopBB); + MDBuilder MDB(F->getContext()); + if (AverageTripCount.has_value()) + ToLoopBR->setMetadata(LLVMContext::MD_prof, + MDB.createLikelyBranchWeights()); + else + setExplicitlyUnknownBranchWeightsIfProfiled(*ToLoopBR, DEBUG_TYPE); + OrigBB->getTerminator()->eraseFromParent(); unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); @@ -964,8 +1021,13 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); LoopIndex->addIncoming(NewIndex, LoopBB); - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); + auto *LoopBR = LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB); + if (AverageTripCount.has_value()) + setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1}, + /*IsExpected=*/false); + else + setExplicitlyUnknownBranchWeightsIfProfiled(*LoopBR, DEBUG_TYPE); } template <typename T> @@ -983,6 +1045,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, const TargetTransformInfo &TTI, ScalarEvolution *SE) { bool CanOverlap = canOverlap(Memcpy, SE); + auto TripCount = getAverageMemOpLoopTripCount(*Memcpy); if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { createMemCpyLoopKnownSize( /* InsertBefore */ Memcpy, @@ -994,7 +1057,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* CanOverlap */ CanOverlap, - /* TargetTransformInfo */ TTI); + /* TargetTransformInfo */ TTI, + /* AtomicElementSize */ std::nullopt, + /* AverageTripCount */ TripCount); } else { createMemCpyLoopUnknownSize( /* InsertBefore */ Memcpy, @@ -1006,7 +1071,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* CanOverlap */ CanOverlap, - /* TargetTransformInfo */ TTI); + /* TargetTransformInfo */ TTI, + /* AtomicElementSize */ std::nullopt, + /* AverageTripCount */ TripCount); } } @@ -1027,16 +1094,17 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) { // We may not be able to emit a pointer comparison, but we don't have // to. Expand as memcpy. + auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove); if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { - createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, - CI, SrcAlign, DstAlign, SrcIsVolatile, - DstIsVolatile, - /*CanOverlap=*/false, TTI); + createMemCpyLoopKnownSize( + /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign, + SrcIsVolatile, DstIsVolatile, + /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount); } else { - createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, - CopyLen, SrcAlign, DstAlign, SrcIsVolatile, - DstIsVolatile, - /*CanOverlap=*/false, TTI); + createMemCpyLoopUnknownSize( + /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, + DstAlign, SrcIsVolatile, DstIsVolatile, + /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount); } return true; @@ -1072,7 +1140,8 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) { /* CopyLen */ Memset->getLength(), /* SetValue */ Memset->getValue(), /* Alignment */ Memset->getDestAlign().valueOrOne(), - Memset->isVolatile()); + /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset), + /* IsVolatile */ Memset->isVolatile()); } void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) { @@ -1081,7 +1150,8 @@ void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) { /* CopyLen=*/Memset->getLength(), /* SetValue=*/Memset->getValue(), /* Alignment=*/Memset->getDestAlign().valueOrOne(), - Memset->isVolatile()); + /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset), + /* IsVolatile */ Memset->isVolatile()); } void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy, @@ -1100,7 +1170,7 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy, /* DstIsVolatile */ AtomicMemcpy->isVolatile(), /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. /* TargetTransformInfo */ TTI, - /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes()); } else { createMemCpyLoopUnknownSize( /* InsertBefore */ AtomicMemcpy, @@ -1113,6 +1183,6 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy, /* DstIsVolatile */ AtomicMemcpy->isVolatile(), /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. /* TargetTransformInfo */ TTI, - /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); + /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes()); } } diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll index a4e049941030e..0ca0bb2421c8b 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll @@ -1,49 +1,62 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s ; Constant length memcpy.inline should be left unmodified. -define void @memcpy_32(ptr %dst, ptr %src) nounwind { +define void @memcpy_32(ptr %dst, ptr %src) nounwind !prof !0 { ; CHECK-LABEL: define void @memcpy_32( -; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false) +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false), !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 true) ; CHECK-NEXT: ret void ; - call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0) + call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0), !prof !1 tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 1) ret void } -define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind { +define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind !prof !0 { ; CHECK-LABEL: define void @memcpy_x( -; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_MEMCPY_EXPANSION:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION:.*]] -; CHECK: [[LOOP_MEMCPY_EXPANSION]]: -; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOOP_MEMCPY_EXPANSION]] ] +; CHECK-NEXT: br i1 [[TMP1]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION:.*]], !prof [[PROF2:![0-9]+]] +; CHECK: [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]]: +; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX]] ; CHECK-NEXT: store i8 [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[LOOP_MEMCPY_EXPANSION]], label %[[POST_LOOP_MEMCPY_EXPANSION]] -; CHECK: [[POST_LOOP_MEMCPY_EXPANSION]]: +; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION]], !prof [[PROF3:![0-9]+]] +; CHECK: [[DYNAMIC_MEMCPY_POST_EXPANSION]]: ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[X]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_MEMCPY_EXPANSION2:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION1:.*]] -; CHECK: [[LOOP_MEMCPY_EXPANSION2]]: -; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP11:%.*]], %[[LOOP_MEMCPY_EXPANSION2]] ] +; CHECK-NEXT: br i1 [[TMP7]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1:.*]], !prof [[PROF2]] +; CHECK: [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]]: +; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[DYNAMIC_MEMCPY_POST_EXPANSION]] ], [ [[TMP11:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]] ; CHECK-NEXT: [[TMP9:%.*]] = load volatile i8, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX3]] ; CHECK-NEXT: store volatile i8 [[TMP9]], ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP11]] = add i64 [[LOOP_INDEX3]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[TMP11]], [[X]] -; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_MEMCPY_EXPANSION2]], label %[[POST_LOOP_MEMCPY_EXPANSION1]] -; CHECK: [[POST_LOOP_MEMCPY_EXPANSION1]]: +; CHECK-NEXT: br i1 [[TMP12]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1]], !prof [[PROF4:![0-9]+]] +; CHECK: [[DYNAMIC_MEMCPY_POST_EXPANSION1]]: ; CHECK-NEXT: ret void ; call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 0) - tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1) + tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1), !prof !1 ret void } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13} +;. +; CHECK: attributes #[[ATTR0]] = { nounwind } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1} +; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"} +; CHECK: [[PROF4]] = !{!"branch_weights", i32 3, i32 1} +;. diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll index 0843b1532f843..b376e27fdaf1c 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s ; Constant length memset.inline should be left unmodified. @@ -14,32 +14,44 @@ define void @memset_32(ptr %a, i8 %value) nounwind { ret void } -define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind { +define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind !prof !0 { ; CHECK-LABEL: define void @memset_x( -; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]] -; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]], !prof [[PROF1:![0-9]+]] ; CHECK: [[LOADSTORELOOP]]: ; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: store i8 [[VALUE]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]], !prof [[PROF2:![0-9]+]] ; CHECK: [[SPLIT]]: ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 0, [[X]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]], !prof [[PROF3:![0-9]+]] ; CHECK: [[LOADSTORELOOP2]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: store volatile i8 [[VALUE]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9]] = add i64 [[TMP7]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]] -; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]], !prof [[PROF3]] ; CHECK: [[SPLIT1]]: ; CHECK-NEXT: ret void ; - call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0) + call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0), !prof !1 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 1) ret void } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13} +;. +; CHECK: attributes #[[ATTR0]] = { nounwind } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1048575, i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1} +; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"} +;. diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll index 1a1fe20350885..f01492f9ceacc 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll @@ -12,14 +12,14 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind { ; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue( ; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]] +; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]], !prof [[PROF0:![0-9]+]] ; CHECK: [[LOADSTORELOOP]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1 -; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]], !prof [[PROF1:![0-9]+]] ; CHECK: [[SPLIT]]: ; CHECK-NEXT: ret void ; @@ -40,14 +40,14 @@ define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind { define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounwind { ; CHECK-LABEL: define void @memset_pattern_i128_1_nz_as( ; CHECK-SAME: ptr addrspace(1) [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]] +; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]], !prof [[PROF0]] ; CHECK: [[LOADSTORELOOP]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[A]], i64 [[TMP1]] ; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(1) [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1 -; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]], !prof [[PROF1]] ; CHECK: [[SPLIT]]: ; CHECK-NEXT: ret void ; @@ -131,7 +131,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind { define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind { ; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa( ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[DOUBLE_TBAA0:![0-9]+]] +; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[DOUBLE_TBAA2:![0-9]+]] ; CHECK-NEXT: ret void ; tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5 @@ -216,8 +216,10 @@ define void @memset_pattern_i64_x_fromnonconstptr(ptr %a, i64 %x, ptr %p) nounwi ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[DOUBLE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0} -; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} -; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048575, i32 1} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[DOUBLE_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +; CHECK: [[META3]] = !{!"double", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"Simple C++ TBAA"} ;. _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
