https://github.com/jinhuang1102 updated https://github.com/llvm/llvm-project/pull/184466
>From 8c2bf49e5f2d0cd2f18424c9b92a6d185524fc31 Mon Sep 17 00:00:00 2001 From: Jin Huang <[email protected]> Date: Wed, 4 Mar 2026 00:19:01 +0000 Subject: [PATCH] [profcheck][coro] Adding Branch weights PGO in CoroSplit and CoroFrame Passes --- .../coro-destructor-of-final_suspend.cpp | 2 +- .../llvm/Transforms/Coroutines/CoroShape.h | 13 +++ .../lib/Transforms/Coroutines/CoroCleanup.cpp | 4 + llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 20 +++++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 87 ++++++++++++++++++- .../coro-await-suspend-lower-invoke.ll | 30 +++++-- .../Transforms/Coroutines/coro-byval-param.ll | 30 +++++-- .../Coroutines/coro-catchswitch-cleanuppad.ll | 7 +- .../Transforms/Coroutines/coro-noop-pacbti.ll | 35 ++++++-- llvm/test/Transforms/Coroutines/coro-noop.ll | 37 ++++++-- 10 files changed, 230 insertions(+), 35 deletions(-) diff --git a/clang/test/CodeGenCoroutines/coro-destructor-of-final_suspend.cpp b/clang/test/CodeGenCoroutines/coro-destructor-of-final_suspend.cpp index bbc16a196d305..087085ba69c3b 100644 --- a/clang/test/CodeGenCoroutines/coro-destructor-of-final_suspend.cpp +++ b/clang/test/CodeGenCoroutines/coro-destructor-of-final_suspend.cpp @@ -61,7 +61,7 @@ gen maybe_throwing(bool x) { // CHECK: define{{.*}}@_Z14maybe_throwingb.destroy // CHECK: %[[INDEX:.+]] = load i1, ptr %index.addr, align 1 -// CHECK: br i1 %[[INDEX]], label %[[AFTERSUSPEND:.+]], label %[[CORO_FREE:.+]] +// CHECK: br i1 %[[INDEX]], label %[[AFTERSUSPEND:.+]], label %[[CORO_FREE:.+]], !prof // CHECK: [[AFTERSUSPEND]]: // CHECK: call{{.*}}_ZN3gen12promise_type13final_awaiterD1Ev( // CHECK: [[CORO_FREE]]: diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h index 28931e3260e68..138ac90470fe7 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h @@ -12,6 +12,7 @@ #ifndef LLVM_TRANSFORMS_COROUTINES_COROSHAPE_H #define LLVM_TRANSFORMS_COROUTINES_COROSHAPE_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Compiler.h" @@ -57,6 +58,9 @@ struct Shape { SmallVector<CoroSizeInst *, 2> CoroSizes; SmallVector<CoroAlignInst *, 2> CoroAligns; SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends; + // Map from suspend instructions to their execution frequency, used for branch + // weights in the resume function. + SmallDenseMap<AnyCoroSuspendInst *, uint64_t, 4> SuspendFreqs; SmallVector<CoroAwaitSuspendInst *, 4> CoroAwaitSuspends; SmallVector<CallInst *, 2> SymmetricTransfers; @@ -70,6 +74,7 @@ struct Shape { CoroSizes.clear(); CoroAligns.clear(); CoroSuspends.clear(); + SuspendFreqs.clear(); CoroAwaitSuspends.clear(); SymmetricTransfers.clear(); @@ -77,6 +82,7 @@ struct Shape { FramePtr = nullptr; AllocaSpillBlock = nullptr; + SwitchSuspendProfileWeights.clear(); } // Scan the function and collect the above intrinsics for later processing @@ -101,6 +107,13 @@ struct Shape { Value *FramePtr = nullptr; BasicBlock *AllocaSpillBlock = nullptr; + struct SwitchSuspendWeights { + uint32_t Resume = 0; + uint32_t Destroy = 0; + bool HasProfile = false; + }; + SmallVector<SwitchSuspendWeights, 4> SwitchSuspendProfileWeights; + struct SwitchLoweringStorage { SwitchInst *ResumeSwitch; AllocaInst *PromiseAlloca; diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index fc8a6277893a3..3534c40831fce 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -185,6 +185,10 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) { FnTy, GlobalValue::LinkageTypes::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), "__NoopCoro_ResumeDestroy", &M); + + // Because this function is a noop, we can set its entry count to 1. + NoopFn->setEntryCount(0); + buildDebugInfoForNoopResumeDestroyFunc(NoopFn); auto *Entry = BasicBlock::Create(C, "entry", NoopFn); ReturnInst::Create(C, Entry); diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 3e3960ea24e88..b2722c46fea64 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -25,8 +25,10 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/OptimizedStructLayout.h" @@ -43,6 +45,10 @@ using namespace llvm; +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + #define DEBUG_TYPE "coro-frame" namespace { @@ -1364,6 +1370,20 @@ static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, SwitchOnDispatch->addCase(SwitchConstant, CaseBB); SwitchIndex++; } + + if (!ProfcheckDisableMetadataFixes) { + // Add branch weights to SwitchOnDispatch, where branches are unreachable by + // default. We mark all branches as having equal weights because they are + // mutually exclusive. + MDBuilder MDB(CleanupPadBB->getContext()); + SmallVector<uint32_t> Weights; + Weights.push_back(0); + for (int i = 0; i < SwitchIndex; ++i) { + Weights.push_back(llvm::MDBuilder::kUnlikelyBranchWeight); + } + SwitchOnDispatch->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + } } static void cleanupSinglePredPHIs(Function &F) { diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 7915fbf4cce05..0291667488ff4 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ConstantFolding.h" @@ -55,7 +56,9 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -69,11 +72,13 @@ #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> #include <initializer_list> #include <iterator> +#include <limits> using namespace llvm; @@ -411,7 +416,11 @@ void coro::BaseCloner::handleFinalSuspend() { auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); - Switch->removeCase(FinalCaseIt); + + // Use SwitchInstProfUpdateWrapper to remove the case, keeping the profile + // branch weights in sync with the switch successors. + SwitchInstProfUpdateWrapper SwitchWrapper(*Switch); + SwitchWrapper.removeCase(FinalCaseIt); if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); @@ -426,7 +435,11 @@ void coro::BaseCloner::handleFinalSuspend() { auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), NewFramePtr); auto *Cond = Builder.CreateIsNull(Load); - Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); + auto *Br = Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); + applyProfMetadataIfEnabled(Br, [&](Instruction *Inst) { + setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, + Inst->getFunction()); + }); } OldSwitchBB->getTerminator()->eraseFromParent(); } @@ -623,8 +636,7 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, } /// Returns all debug records in F. -static SmallVector<DbgVariableRecord *> -collectDbgVariableRecords(Function &F) { +static SmallVector<DbgVariableRecord *> collectDbgVariableRecords(Function &F) { SmallVector<DbgVariableRecord *> DbgVariableRecords; for (auto &I : instructions(F)) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) @@ -1102,6 +1114,42 @@ void coro::SwitchCloner::create() { // Replacing coro.free with 'null' in cleanup to suppress deallocation code. if (FKind == coro::CloneKind::SwitchCleanup) elideCoroFree(NewFramePtr); + + // Update branch weights of the cloned resume switch if we have profile data. + if (auto *ClonedSwitch = cast_if_present<SwitchInst>( + VMap[Shape.SwitchLowering.ResumeSwitch])) { + bool IsDestroy = isSwitchDestroyFunction(); + bool HasAnyProfile = false; + for (auto &W : Shape.SwitchSuspendProfileWeights) { + if (W.HasProfile) { + HasAnyProfile = true; + break; + } + } + + if (HasAnyProfile) { + SmallVector<uint32_t, 4> ClonedWeights; + // Default destination (unreachable) weight. + ClonedWeights.push_back(0); + + for (auto &Case : ClonedSwitch->cases()) { + auto *Val = Case.getCaseValue(); + uint64_t SuspendIndex = Val->getZExtValue(); + uint32_t Weight = 0; + if (SuspendIndex < Shape.SwitchSuspendProfileWeights.size()) { + auto &W = Shape.SwitchSuspendProfileWeights[SuspendIndex]; + if (W.HasProfile) { + Weight = IsDestroy ? W.Destroy : W.Resume; + } + } + ClonedWeights.push_back(Weight); + } + + MDBuilder MDB(NewF->getContext()); + ClonedSwitch->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(ClonedWeights)); + } + } } static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { @@ -1505,6 +1553,10 @@ struct SwitchCoroutineSplitter { // Split all coro.suspend calls size_t SuspendIndex = 0; + SmallVector<uint64_t, 8> SwitchWeights64; + // Default destination (unreachable) has weight 0 + SwitchWeights64.push_back(0); + for (auto *AnyS : Shape.CoroSuspends) { auto *S = cast<CoroSuspendInst>(AnyS); ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); @@ -1557,6 +1609,14 @@ struct SwitchCoroutineSplitter { S->getNextNode(), ResumeBB->getName() + Twine(".landing")); Switch->addCase(IndexVal, ResumeBB); + // Get pre-split frequency for this suspend point + uint64_t Weight = 1; // Default fallback weight + auto It = Shape.SuspendFreqs.find(AnyS); + if (It != Shape.SuspendFreqs.end()) { + Weight = It->second; + } + SwitchWeights64.push_back(Weight); + cast<UncondBrInst>(SuspendBB->getTerminator())->setSuccessor(LandingBB); auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, ""); PN->insertBefore(LandingBB->begin()); @@ -1590,6 +1650,13 @@ struct SwitchCoroutineSplitter { ++SuspendIndex; } + if (!Shape.SuspendFreqs.empty()) { + auto SwitchWeights32 = llvm::fitWeights(SwitchWeights64); + MDBuilder MDB(C); + Switch->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(SwitchWeights32)); + } + Builder.SetInsertPoint(UnreachBB); Builder.CreateUnreachable(); DBuilder.finalize(); @@ -1614,6 +1681,10 @@ struct SwitchCoroutineSplitter { // If there is a CoroAlloc and it returns false (meaning we elide the // allocation, use CleanupFn instead of DestroyFn). DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); + applyProfMetadataIfEnabled(DestroyOrCleanupFn, [&](Instruction *Inst) { + setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, + CoroId->getFunction()); + }); } // Destroy function pointer @@ -2248,6 +2319,14 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, F.setSplittedCoroutine(); + // Query BFI and populate SuspendFreqs before splitting + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + for (auto *AnyS : Shape.CoroSuspends) { + BasicBlock *BB = AnyS->getParent(); + uint64_t Freq = BFI.getBlockFreq(BB).getFrequency(); + Shape.SuspendFreqs[AnyS] = Freq; + } + std::unique_ptr<coro::BaseABI> ABI = CreateAndInitABI(F, Shape); SmallVector<Function *, 4> Clones; diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll index 20acaed0abe0d..eace4ecf11310 100644 --- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll +++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 6 ; Tests that invoke <type> @llvm.coro.await.suspend lowers to invoke @helper ; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),simplifycfg' -S | FileCheck %s %Awaiter = type {} -define void @f() presplitcoroutine personality i32 0 { +define void @f() presplitcoroutine personality i32 0 !prof !0 { entry: %awaiter = alloca %Awaiter %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) @@ -103,7 +103,13 @@ declare void @__cxa_end_catch() declare noalias ptr @malloc(i32) declare void @free(ptr) -; CHECK-LABEL: define void @f() personality i32 0 { + +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: @f.resumers = private constant [3 x ptr] [ptr @f.resume, ptr @f.destroy, ptr @f.cleanup] +;. +; CHECK-LABEL: define void @f( +; CHECK-SAME: ) personality i32 0 !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr @f, ptr @f.resumers) ; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i32 24) @@ -118,7 +124,7 @@ declare void @free(ptr) ; ; ; CHECK-LABEL: define internal void @f.resume( -; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 { +; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 !prof [[PROF0]] { ; CHECK-NEXT: [[ENTRY_RESUME:.*]]: ; CHECK-NEXT: [[AWAITER_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[HDL]], i64 0 ; CHECK-NEXT: [[INDEX_ADDR:%.*]] = getelementptr inbounds i8, ptr [[HDL]], i64 16 @@ -128,7 +134,7 @@ declare void @free(ptr) ; CHECK-NEXT: i2 1, label %[[AFTERCOROSUSPEND3:.*]] ; CHECK-NEXT: i2 -2, label %[[AFTERCOROSUSPEND7:.*]] ; CHECK-NEXT: i2 -1, label %[[CLEANUP:.*]] -; CHECK-NEXT: ] +; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]] ; CHECK: [[COROSAVE1]]: ; CHECK-NEXT: [[INDEX_ADDR13:%.*]] = getelementptr inbounds i8, ptr [[HDL]], i64 16 ; CHECK-NEXT: store i2 1, ptr [[INDEX_ADDR13]], align 1 @@ -176,7 +182,7 @@ declare void @free(ptr) ; ; ; CHECK-LABEL: define internal void @f.destroy( -; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 { +; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 !prof [[PROF0]] { ; CHECK-NEXT: [[ENTRY_DESTROY:.*:]] ; CHECK-NEXT: [[AWAITER_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[HDL]], i64 0 ; CHECK-NEXT: [[MEM:%.*]] = call ptr @llvm.coro.free(token poison, ptr [[HDL]]) @@ -185,9 +191,19 @@ declare void @free(ptr) ; ; ; CHECK-LABEL: define internal void @f.cleanup( -; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 { +; CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(24) [[HDL:%.*]]) personality i32 0 !prof [[PROF0]] { ; CHECK-NEXT: [[ENTRY_CLEANUP:.*:]] ; CHECK-NEXT: [[AWAITER_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[HDL]], i64 0 ; CHECK-NEXT: call void @free(ptr null) ; CHECK-NEXT: ret void ; +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(argmem: read) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nomerge nounwind } +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 0, i32 -2147483648, i32 715827200, i32 119304419, i32 159072407} +;. diff --git a/llvm/test/Transforms/Coroutines/coro-byval-param.ll b/llvm/test/Transforms/Coroutines/coro-byval-param.ll index db1f151b59bc5..44204de99ea66 100644 --- a/llvm/test/Transforms/Coroutines/coro-byval-param.ll +++ b/llvm/test/Transforms/Coroutines/coro-byval-param.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -10,21 +10,24 @@ target datalayout = "e-m:e-p:64:64-i64:64-f80:128-n8:16:32:64-S128" ; struct pointer, and that the alignment is taken into account. ; Function Attrs: noinline ssp uwtable mustprogress -define ptr @foo(ptr nocapture readonly byval(%struct.A) align 8 %a1) #0 { +;. +; CHECK: @foo.resumers = private constant [3 x ptr] [ptr @foo.resume, ptr @foo.destroy, ptr @foo.cleanup] +;. +define ptr @foo(ptr nocapture readonly byval(%struct.A) align 8 %a1) #0 !prof !0 { ; CHECK-LABEL: define ptr @foo( -; CHECK-SAME: ptr readonly byval([[STRUCT_A:%.*]]) align 8 [[A1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr readonly byval([[STRUCT_A:%.*]]) align 8 [[A1:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.coro.id(i32 16, ptr nonnull null, ptr @foo, ptr @foo.resumers) ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.coro.alloc(token [[TMP0]]) ; CHECK-NEXT: br i1 [[TMP1]], label %[[CORO_ALLOC:.*]], label %[[CORO_INIT:.*]] ; CHECK: [[CORO_ALLOC]]: -; CHECK-NEXT: [[CALL:%.*]] = call noalias nonnull ptr @_Znwm(i64 48) #[[ATTR11:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias nonnull ptr @_Znwm(i64 48) #[[ATTR10:[0-9]+]] ; CHECK-NEXT: br label %[[CORO_INIT]] ; CHECK: [[CORO_INIT]]: ; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[CALL]], %[[CORO_ALLOC]] ], [ null, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[TMP0]], ptr [[TMP2]]) ; CHECK-NEXT: store ptr @foo.resume, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], ptr @foo.destroy, ptr @foo.cleanup +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], ptr @foo.destroy, ptr @foo.cleanup, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[DESTROY_ADDR]], align 8 ; CHECK-NEXT: [[__PROMISE_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 @@ -152,3 +155,20 @@ attributes #8 = { nobuiltin nounwind "frame-pointer"="all" "no-trapping-math"="t attributes #9 = { allocsize(0) } attributes #10 = { noduplicate } +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: attributes #[[ATTR0]] = { mustprogress noinline ssp uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR2]] = { nounwind } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nobuiltin nofree allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { mustprogress noinline nounwind ssp willreturn uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nomerge nounwind } +; CHECK: attributes #[[ATTR8:[0-9]+]] = { nobuiltin nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nounwind memory(argmem: read) } +; CHECK: attributes #[[ATTR10]] = { allocsize(0) } +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"unknown", !"coro-split"} +;. diff --git a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll index 99f9a3a5e77c8..945b364cbad70 100644 --- a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll +++ b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg<switch-range-to-icmp>,early-cse' -S | FileCheck %s declare i32 @__CxxFrameHandler3(...) -define ptr @f2(i1 %val) presplitcoroutine personality ptr @__CxxFrameHandler3 { +define ptr @f2(i1 %val) presplitcoroutine personality ptr @__CxxFrameHandler3 !prof !0 { entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr @f2, ptr null) %valueA = call i32 @f(); @@ -81,7 +81,7 @@ cleanup2: ; CHECK: %1 = phi i8 [ 0, %handler2 ], [ 1, %catch.dispatch.2 ] ; CHECK: %2 = cleanuppad within %h1 [] ; CHECK: %3 = icmp eq i8 %1, 0 -; CHECK: br i1 %3, label %cleanup2.from.handler2, label %cleanup2.from.catch.dispatch.2 +; CHECK: br i1 %3, label %cleanup2.from.handler2, label %cleanup2.from.catch.dispatch.2, !prof [[PROF1:![0-9]+]] ; CHECK: cleanup2.from.handler2: ; CHECK: %valueB.reload = load i32, ptr %valueB.spill.addr, align 4 @@ -113,3 +113,6 @@ declare void @print(i32) declare void @free(ptr) declare i32 @f() + +!0 = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1} diff --git a/llvm/test/Transforms/Coroutines/coro-noop-pacbti.ll b/llvm/test/Transforms/Coroutines/coro-noop-pacbti.ll index 6fb8312f97bdf..7728a77a7c246 100644 --- a/llvm/test/Transforms/Coroutines/coro-noop-pacbti.ll +++ b/llvm/test/Transforms/Coroutines/coro-noop-pacbti.ll @@ -1,14 +1,8 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 6 ; RUN: opt < %s -S -passes=coro-cleanup | FileCheck %s -; CHECK: define internal void @__NoopCoro_ResumeDestroy(ptr %0) #1 { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void -; CHECK-NEXT: } - -; CHECK: attributes #1 = { "branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="a_key" } - -define ptr @noop() { +define ptr @noop() !prof !3 { entry: %n = call ptr @llvm.coro.noop() ret ptr %n @@ -21,3 +15,28 @@ declare ptr @llvm.coro.noop() !0 = !{i32 8, !"branch-target-enforcement", i32 1} !1 = !{i32 8, !"sign-return-address", i32 1} !2 = !{i32 8, !"sign-return-address-all", i32 1} +!3 = !{!"function_entry_count", i64 1000} +;. +; CHECK: @NoopCoro.Frame.Const = private constant %NoopCoro.Frame { ptr @__NoopCoro_ResumeDestroy, ptr @__NoopCoro_ResumeDestroy }, no_sanitize_address, no_sanitize_hwaddress +;. +; CHECK-LABEL: define ptr @noop( +; CHECK-SAME: ) !prof [[PROF3:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret ptr @NoopCoro.Frame.Const +; +; +; CHECK-LABEL: define internal void @__NoopCoro_ResumeDestroy( +; CHECK-SAME: ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !prof [[PROF4:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } +; CHECK: attributes #[[ATTR1]] = { "branch-target-enforcement" "sign-return-address"="all" "sign-return-address-key"="a_key" } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 8, !"branch-target-enforcement", i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 8, !"sign-return-address", i32 1} +; CHECK: [[META2:![0-9]+]] = !{i32 8, !"sign-return-address-all", i32 1} +; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF4]] = !{!"function_entry_count", i64 0} +;. diff --git a/llvm/test/Transforms/Coroutines/coro-noop.ll b/llvm/test/Transforms/Coroutines/coro-noop.ll index 3736d3f1762f5..47186bc899124 100644 --- a/llvm/test/Transforms/Coroutines/coro-noop.ll +++ b/llvm/test/Transforms/Coroutines/coro-noop.ll @@ -1,14 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 6 ; Tests that CoroCleanup pass correctly lowers coro.noop ; RUN: opt < %s -S -passes=coro-cleanup | FileCheck %s -; CHECK: @NoopCoro.Frame.Const = private constant %NoopCoro.Frame { ptr @__NoopCoro_ResumeDestroy, ptr @__NoopCoro_ResumeDestroy } -; CHECK-LABEL: @noop( define ptr @noop() { -; CHECK-NEXT: entry entry: -; CHECK-NEXT: ret ptr @NoopCoro.Frame.Const %n = call ptr @llvm.coro.noop() ret ptr %n } @@ -25,8 +22,32 @@ declare ptr @llvm.coro.noop() !4 = !{i32 2, !"Debug Info Version", i32 3} -; CHECK: define internal void @__NoopCoro_ResumeDestroy(ptr %0) !dbg ![[RESUME:[0-9]+]] { -; CHECK-NEXT: entry -; CHECK-NEXT: ret void -; CHECK: ![[RESUME]] = distinct !DISubprogram(name: "__NoopCoro_ResumeDestroy", linkageName: "__NoopCoro_ResumeDestroy", {{.*}} flags: DIFlagArtificial, +;. +; CHECK: @NoopCoro.Frame.Const = private constant %NoopCoro.Frame { ptr @__NoopCoro_ResumeDestroy, ptr @__NoopCoro_ResumeDestroy }, no_sanitize_address, no_sanitize_hwaddress +;. +; CHECK-LABEL: define ptr @noop( +; CHECK-SAME: ) !prof [[PROF5:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret ptr @NoopCoro.Frame.Const +; +; +; CHECK-LABEL: define internal void @__NoopCoro_ResumeDestroy( +; CHECK-SAME: ptr [[TMP0:%.*]]) !dbg [[DBG6:![0-9]+]] !prof [[PROF9:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "hand-written", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]]) +; CHECK: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}}) +; CHECK: [[META2]] = !{} +; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Dwarf Version", i32 4} +; CHECK: [[META4:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CHECK: [[PROF5]] = !{!"function_entry_count", i64 1000} +; CHECK: [[DBG6]] = distinct !DISubprogram(name: "__NoopCoro_ResumeDestroy", linkageName: "__NoopCoro_ResumeDestroy", scope: null, file: [[META1]], type: [[META7:![0-9]+]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META0]]) +; CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]]) +; CHECK: [[META8]] = !{null, null} +; CHECK: [[PROF9]] = !{!"function_entry_count", i64 0} +;. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
