https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/135656
None >From e7b5f81cf9bd3237b0b1ee2f51f105b99f1061b7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Mon, 14 Apr 2025 07:19:58 -0700 Subject: [PATCH] Reentry --- .../lib/ctx_profile/CtxInstrProfiling.cpp | 151 ++++++++++++------ .../tests/CtxInstrProfilingTest.cpp | 115 ++++++++++++- .../llvm/ProfileData/CtxInstrContextNode.h | 6 +- .../Instrumentation/PGOCtxProfLowering.cpp | 82 ++++++---- .../PGOProfile/ctx-instrumentation.ll | 4 +- 5 files changed, 269 insertions(+), 89 deletions(-) diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp index 2d173f0fcb19a..2e26541c1acea 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp @@ -41,7 +41,44 @@ Arena *FlatCtxArena = nullptr; // Set to true when we enter a root, and false when we exit - regardless if this // thread collects a contextual profile for that root. -__thread bool IsUnderContext = false; +__thread int UnderContextRefCount = 0; +__thread void *volatile EnteredContextAddress = 0; + +void onFunctionEntered(void *Address) { + UnderContextRefCount += (Address == EnteredContextAddress); + assert(UnderContextRefCount > 0); +} + +void onFunctionExited(void *Address) { + UnderContextRefCount -= (Address == EnteredContextAddress); + assert(UnderContextRefCount >= 0); +} + +// Returns true if it was entered the first time +bool rootEnterIsFirst(void* Address) { + bool Ret = true; + if (!EnteredContextAddress) { + EnteredContextAddress = Address; + assert(UnderContextRefCount == 0); + Ret = true; + } + onFunctionEntered(Address); + return Ret; +} + +// Return true if this also exits the root. +bool exitsRoot(void* Address) { + onFunctionExited(Address); + if (UnderContextRefCount == 0) { + EnteredContextAddress = nullptr; + return true; + } + return false; + +} + +bool hasEnteredARoot() { return UnderContextRefCount > 0; } + __sanitizer::atomic_uint8_t ProfilingStarted = {}; __sanitizer::atomic_uintptr_t RootDetector = {}; @@ -287,62 +324,65 @@ ContextRoot *FunctionData::getOrAllocateContextRoot() { return Root; } -ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid, - uint32_t Counters, uint32_t Callsites) - SANITIZER_NO_THREAD_SAFETY_ANALYSIS { - IsUnderContext = true; - __sanitizer::atomic_fetch_add(&Root->TotalEntries, 1, - __sanitizer::memory_order_relaxed); +ContextNode *tryStartContextGivenRoot( + ContextRoot *Root, void *EntryAddress, GUID Guid, uint32_t Counters, + uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + + if (rootEnterIsFirst(EntryAddress)) + __sanitizer::atomic_fetch_add(&Root->TotalEntries, 1, + __sanitizer::memory_order_relaxed); if (!Root->FirstMemBlock) { setupContext(Root, Guid, Counters, Callsites); } if (Root->Taken.TryLock()) { + assert(__llvm_ctx_profile_current_context_root == nullptr); __llvm_ctx_profile_current_context_root = Root; onContextEnter(*Root->FirstNode); return Root->FirstNode; } // If this thread couldn't take the lock, return scratch context. - __llvm_ctx_profile_current_context_root = nullptr; return TheScratchContext; } +ContextNode *getOrStartContextOutsideCollection(FunctionData &Data, + ContextRoot *OwnCtxRoot, + void *Callee, GUID Guid, + uint32_t NumCounters, + uint32_t NumCallsites) { + // This must only be called when __llvm_ctx_profile_current_context_root is + // null. + assert(__llvm_ctx_profile_current_context_root == nullptr); + // OwnCtxRoot is Data.CtxRoot. Since it's volatile, and is used by the caller, + // pre-load it. + assert(Data.CtxRoot == OwnCtxRoot); + // If we have a root detector, try sampling. + // Otherwise - regardless if we started profiling or not, if Data.CtxRoot is + // allocated, try starting a context tree - basically, as-if + // __llvm_ctx_profile_start_context were called. + if (auto *RAD = getRootDetector()) + RAD->sample(); + else if (reinterpret_cast<uintptr_t>(OwnCtxRoot) > 1) + return tryStartContextGivenRoot(OwnCtxRoot, Data.EntryAddress, Guid, + NumCounters, NumCallsites); + + // If we didn't start profiling, or if we are under a context, just not + // collecting, return the scratch buffer. + if (hasEnteredARoot() || + !__sanitizer::atomic_load_relaxed(&ProfilingStarted)) + return TheScratchContext; + return markAsScratch( + onContextEnter(*getFlatProfile(Data, Callee, Guid, NumCounters))); +} + ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid, uint32_t NumCounters, uint32_t NumCallsites, - ContextRoot *CtxRoot) { - - // 1) if we are currently collecting a contextual profile, fetch a ContextNode - // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted` - // to (hopefully) offset the penalty of creating these contexts to before - // profiling. - // - // 2) if we are under a root (regardless if this thread is collecting or not a - // contextual profile for that root), do not collect a flat profile. We want - // to keep flat profiles only for activations that can't happen under a root, - // to avoid confusing profiles. We can, for example, combine flattened and - // flat profiles meaningfully, as we wouldn't double-count anything. - // - // 3) to avoid lengthy startup, don't bother with flat profiles until the - // profiling has started. We would reset them anyway when profiling starts. - // HOWEVER. This does lose profiling for message pumps: those functions are - // entered once and never exit. They should be assumed to be entered before - // profiling starts - because profiling should start after the server is up - // and running (which is equivalent to "message pumps are set up"). - if (!CtxRoot) { - if (auto *RAD = getRootDetector()) - RAD->sample(); - else if (auto *CR = Data.CtxRoot) { - if (canBeRoot(CR)) - return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites); - } - if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted)) - return TheScratchContext; - else - return markAsScratch( - onContextEnter(*getFlatProfile(Data, Callee, Guid, NumCounters))); - } - auto [Iter, Ins] = CtxRoot->Unhandled.insert({Guid, nullptr}); + ContextRoot &CtxRoot) { + // This nust only be called when + // __llvm_ctx_profile_current_context_root is not null + assert(__llvm_ctx_profile_current_context_root != nullptr); + auto [Iter, Ins] = CtxRoot.Unhandled.insert({Guid, nullptr}); if (Ins) - Iter->second = getCallsiteSlow(Guid, &CtxRoot->FirstUnhandledCalleeNode, + Iter->second = getCallsiteSlow(Guid, &CtxRoot.FirstUnhandledCalleeNode, NumCounters, 0); return markAsScratch(onContextEnter(*Iter->second)); } @@ -351,10 +391,13 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, GUID Guid, uint32_t NumCounters, uint32_t NumCallsites) { auto *CtxRoot = __llvm_ctx_profile_current_context_root; - // fast "out" if we're not even doing contextual collection. + auto *OwnCtxRoot = Data->CtxRoot; if (!CtxRoot) - return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites, - nullptr); + return getOrStartContextOutsideCollection(*Data, OwnCtxRoot, Callee, Guid, + NumCounters, NumCallsites); + onFunctionEntered(Callee); + assert(canBeRoot(CtxRoot)); + // should we re-enter the root we're currently collecting, // also fast "out" if the caller is scratch. We can see if it's scratch by // looking at the interior pointer into the subcontexts vector that the caller @@ -364,7 +407,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]); if (!CallsiteContext || isScratch(CallsiteContext)) return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites, - CtxRoot); + *CtxRoot); // if the callee isn't the expected one, return scratch. // Signal handler(s) could have been invoked at any point in the execution. @@ -383,7 +426,7 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]); if (ExpectedCallee != Callee) return getUnhandledContext(*Data, Callee, Guid, NumCounters, NumCallsites, - CtxRoot); + *CtxRoot); auto *Callsite = *CallsiteContext; // in the case of indirect calls, we will have all seen targets forming a @@ -410,16 +453,20 @@ ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid, uint32_t Callsites) { auto *Root = FData->getOrAllocateContextRoot(); assert(canBeRoot(Root)); - return tryStartContextGivenRoot(Root, Guid, Counters, Callsites); + auto *EntryAddress = FData->EntryAddress; + return tryStartContextGivenRoot(Root, EntryAddress, Guid, Counters, + Callsites); } void __llvm_ctx_profile_release_context(FunctionData *FData) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + if (!exitsRoot(FData->EntryAddress)) + return; const auto *CurrentRoot = __llvm_ctx_profile_current_context_root; auto *CR = FData->CtxRoot; if (!CurrentRoot || CR != CurrentRoot) return; - IsUnderContext = false; + assert(CR && canBeRoot(CR)); __llvm_ctx_profile_current_context_root = nullptr; CR->Taken.Unlock(); @@ -500,6 +547,10 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) { void __llvm_ctx_profile_free() { __sanitizer::atomic_store_relaxed(&ProfilingStarted, false); { + __sanitizer::atomic_store_relaxed(&ProfilingStarted, false); + if (auto *RD = getRootDetector()) { + RD->join(); + } __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( &AllContextsMutex); for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) @@ -522,5 +573,7 @@ void __llvm_ctx_profile_free() { } FlatCtxArenaHead = nullptr; + UnderContextRefCount = 0; + EnteredContextAddress = nullptr; } } diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp index 80a9a96f2a16b..39a225ac1dd93 100644 --- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp +++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp @@ -3,13 +3,27 @@ #include <thread> using namespace __ctx_profile; +using namespace __asan; class ContextTest : public ::testing::Test { - void SetUp() override { Root.getOrAllocateContextRoot(); } + int SomethingWithAddress = 0; + void SetUp() override { + Root.EntryAddress = &SomethingWithAddress; + Root.getOrAllocateContextRoot(); + } void TearDown() override { __llvm_ctx_profile_free(); } public: FunctionData Root; + void initializeFData(std::vector<FunctionData> &FData, + const std::vector<int> &FuncAddresses, bool AsRoots) { + ASSERT_EQ(FData.size(), FuncAddresses.size()); + for (size_t I = 0, E = FData.size(); I < E; ++I) { + FData[I].EntryAddress = &FuncAddresses[I]; + if (AsRoots) + FData[I].getOrAllocateContextRoot(); + } + } }; TEST(ArenaTest, ZeroInit) { @@ -85,7 +99,11 @@ TEST_F(ContextTest, Callsite) { EXPECT_EQ(Subctx->size(), sizeof(ContextNode) + 3 * sizeof(uint64_t) + 1 * sizeof(ContextNode *)); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&FData); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); } TEST_F(ContextTest, ScratchNoCollectionProfilingNotStarted) { @@ -122,11 +140,41 @@ TEST_F(ContextTest, ScratchNoCollectionProfilingStarted) { EXPECT_NE(FData.FlatCtx, nullptr); EXPECT_EQ(reinterpret_cast<uintptr_t>(FData.FlatCtx) + 1, reinterpret_cast<uintptr_t>(Ctx)); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); + __llvm_ctx_profile_release_context(&FData); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); +} + +TEST_F(ContextTest, RootCallingRootDoesNotChangeCurrentContext) { + ASSERT_EQ(__llvm_ctx_profile_current_context_root, nullptr); + int FakeCalleeAddress[2]{0, 0}; + FunctionData FData[2]; + FData[0].EntryAddress = &FakeCalleeAddress[0]; + FData[1].EntryAddress = &FakeCalleeAddress[1]; + FData[0].getOrAllocateContextRoot(); + FData[1].getOrAllocateContextRoot(); + __llvm_ctx_profile_start_collection(); + auto *Ctx1 = __llvm_ctx_profile_get_context(&FData[0], &FakeCalleeAddress[0], + 1234U, 1U, 1U); + EXPECT_EQ(Ctx1, FData[0].CtxRoot->FirstNode); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot); + + __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress[0]; + __llvm_ctx_profile_callsite[0] = &Ctx1->subContexts()[0]; + auto *Ctx2 = + __llvm_ctx_profile_get_context(&FData[1], &FakeCalleeAddress[1], 2, 1, 0); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot); + __llvm_ctx_profile_release_context(&FData[1]); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, FData[0].CtxRoot); + __llvm_ctx_profile_release_context(&FData[0]); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); } TEST_F(ContextTest, ScratchDuringCollection) { __llvm_ctx_profile_start_collection(); auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + int FakeCalleeAddress = 0; int OtherFakeCalleeAddress = 0; __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress; @@ -164,7 +212,71 @@ TEST_F(ContextTest, ScratchDuringCollection) { EXPECT_TRUE(isScratch(Subctx3)); EXPECT_EQ(FData[2].FlatCtx, nullptr); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&FData[2]); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&FData[1]); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&FData[0]); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); +} + +TEST_F(ContextTest, RecursiveRootExplicitlyRegistered) { + __llvm_ctx_profile_start_collection(); + auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + + auto *Subctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4); + EXPECT_TRUE(isScratch(Subctx)); + + EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U); + + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); +} + +TEST_F(ContextTest, RecursiveRootAutoDiscovered) { + __llvm_ctx_profile_start_collection(); + auto *Ctx = + __llvm_ctx_profile_get_context(&Root, Root.EntryAddress, 1, 10, 4); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + + auto *Subctx = + __llvm_ctx_profile_get_context(&Root, Root.EntryAddress, 1, 10, 4); + EXPECT_TRUE(isScratch(Subctx)); + + EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U); + + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); +} + +TEST_F(ContextTest, RootEntersOtherRoot) { + __llvm_ctx_profile_start_collection(); + FData Roots[2]; + std::vector<int> Addresses(2); + initializeFData(Roots, Addresses, true); + auto *Ctx = __llvm_ctx_profile_start_context(&Roots[0], 1, 10, 4); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Roots[0].CtxRoot); + + auto *Subctx = __llvm_ctx_profile_start_context(&Roots[1], 1, 10, 4); + EXPECT_FALSE(isScratch(Subctx)); + + EXPECT_EQ(__sanitizer::atomic_load_relaxed(&Root.CtxRoot->TotalEntries), 1U); + + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, Root.CtxRoot); + __llvm_ctx_profile_release_context(&Root); + EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr); } TEST_F(ContextTest, NeedMoreMemory) { @@ -185,6 +297,7 @@ TEST_F(ContextTest, NeedMoreMemory) { EXPECT_EQ(Ctx->subContexts()[2], Subctx); EXPECT_NE(CurrentMem, CtxRoot.CurrentMem); EXPECT_NE(CtxRoot.CurrentMem, nullptr); + __llvm_ctx_profile_release_context(&Root); } TEST_F(ContextTest, ConcurrentRootCollection) { diff --git a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h index e4e310b2e987d..a33549fb327ba 100644 --- a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h +++ b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h @@ -125,10 +125,10 @@ class ContextNode final { /// VOLATILE_PTRDECL is the same as above, but for volatile pointers; /// /// MUTEXDECL takes one parameter, the name of a field that is a mutex. -#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, VOLATILE_PTRDECL, \ - MUTEXDECL) \ +#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, ENTRY_ADDRESS, \ + VOLATILE_PTRDECL, MUTEXDECL) \ PTRDECL(FunctionData, Next) \ - VOLATILE_PTRDECL(void, EntryAddress) \ + ENTRY_ADDRESS \ CONTEXT_PTR \ VOLATILE_PTRDECL(ContextNode, FlatCtx) \ MUTEXDECL(Mutex) diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp index 136225ab27cdc..2195030335d43 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -66,7 +66,12 @@ class CtxInstrumentationLowerer final { Function *ReleaseCtx = nullptr; GlobalVariable *ExpectedCalleeTLS = nullptr; GlobalVariable *CallsiteInfoTLS = nullptr; - Constant *CannotBeRootInitializer = nullptr; + Type *PointerTy = nullptr; + Type *SanitizerMutexType = nullptr; + Type *I32Ty = nullptr; + Type *I64Ty = nullptr; + + Constant *getFunctionDataInitializer(Function &F, bool HasMusttail); public: CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM); @@ -114,42 +119,55 @@ void emitUnsupportedRoot(const Function &F, StringRef Reason) { } } // namespace +Constant * +CtxInstrumentationLowerer::getFunctionDataInitializer(Function &F, + bool HasMusttail) { +#define _PTRDECL(_, __) Constant::getNullValue(PointerTy), +#define _VOLATILE_PTRDECL(_, __) _PTRDECL(_, __) +#define _MUTEXDECL(_) Constant::getNullValue(SanitizerMutexType), +#define _ENTRY_ADDRESS \ + (ContextRootSet.contains(&F) ? &F : Constant::getNullValue(PointerTy)), +#define _CONTEXT_ROOT \ + (HasMusttail \ + ? Constant::getIntegerValue( \ + PointerTy, \ + APInt(M.getDataLayout().getPointerTypeSizeInBits(PointerTy), 1U)) \ + : Constant::getNullValue(PointerTy)), + return ConstantStruct::get( + FunctionDataTy, + {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT, _ENTRY_ADDRESS, + _VOLATILE_PTRDECL, _MUTEXDECL)}); +#undef _PTRDECL +#undef _CONTEXT_ROOT +#undef _ENTRY_ADDRESS +#undef _VOLATILE_PTRDECL +#undef _MUTEXDECL +} + // set up tie-in with compiler-rt. // NOTE!!! // These have to match compiler-rt/lib/ctx_profile/CtxInstrProfiling.h CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) { - auto *PointerTy = PointerType::get(M.getContext(), 0); - auto *SanitizerMutexType = Type::getInt8Ty(M.getContext()); - auto *I32Ty = Type::getInt32Ty(M.getContext()); - auto *I64Ty = Type::getInt64Ty(M.getContext()); + PointerTy = PointerType::get(M.getContext(), 0); + SanitizerMutexType = Type::getInt8Ty(M.getContext()); + I32Ty = Type::getInt32Ty(M.getContext()); + I64Ty = Type::getInt64Ty(M.getContext()); #define _PTRDECL(_, __) PointerTy, #define _VOLATILE_PTRDECL(_, __) PointerTy, #define _CONTEXT_ROOT PointerTy, +#define _ENTRY_ADDRESS PointerTy, #define _MUTEXDECL(_) SanitizerMutexType, FunctionDataTy = StructType::get( - M.getContext(), {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT, - _VOLATILE_PTRDECL, _MUTEXDECL)}); -#undef _PTRDECL -#undef _CONTEXT_ROOT -#undef _VOLATILE_PTRDECL -#undef _MUTEXDECL - -#define _PTRDECL(_, __) Constant::getNullValue(PointerTy), -#define _VOLATILE_PTRDECL(_, __) _PTRDECL(_, __) -#define _MUTEXDECL(_) Constant::getNullValue(SanitizerMutexType), -#define _CONTEXT_ROOT \ - Constant::getIntegerValue( \ - PointerTy, \ - APInt(M.getDataLayout().getPointerTypeSizeInBits(PointerTy), 1U)), - CannotBeRootInitializer = ConstantStruct::get( - FunctionDataTy, {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT, - _VOLATILE_PTRDECL, _MUTEXDECL)}); + M.getContext(), + {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT, _ENTRY_ADDRESS, + _VOLATILE_PTRDECL, _MUTEXDECL)}); #undef _PTRDECL #undef _CONTEXT_ROOT +#undef _ENYTR_ADDRESS #undef _VOLATILE_PTRDECL #undef _MUTEXDECL @@ -286,9 +304,8 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { // NumCallsites and NumCounters. We delcare it here because it's more // convenient - we have the Builder. ThisContextType = StructType::get( - F.getContext(), - {ContextNodeTy, ArrayType::get(Builder.getInt64Ty(), NumCounters), - ArrayType::get(Builder.getPtrTy(), NumCallsites)}); + F.getContext(), {ContextNodeTy, ArrayType::get(I64Ty, NumCounters), + ArrayType::get(PointerTy, NumCallsites)}); // Figure out which way we obtain the context object for this function - // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the // former case, we also set TheRootFuctionData since we need to release it @@ -302,8 +319,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { // treated as a "can't be set as root". TheRootFuctionData = new GlobalVariable( M, FunctionDataTy, false, GlobalVariable::InternalLinkage, - HasMusttail ? CannotBeRootInitializer - : Constant::getNullValue(FunctionDataTy)); + getFunctionDataInitializer(F, HasMusttail)); if (ContextRootSet.contains(&F)) { Context = Builder.CreateCall( @@ -320,7 +336,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { }); } // The context could be scratch. - auto *CtxAsInt = Builder.CreatePtrToInt(Context, Builder.getInt64Ty()); + auto *CtxAsInt = Builder.CreatePtrToInt(Context, I64Ty); if (NumCallsites > 0) { // Figure out which index of the TLS 2-element buffers to use. // Scratch context => we use index == 1. Real contexts => index == 0. @@ -330,8 +346,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { PointerType::getUnqual(F.getContext()), Builder.CreateThreadLocalAddress(ExpectedCalleeTLS), {Index}); CallsiteInfoTLSAddr = Builder.CreateGEP( - Builder.getInt32Ty(), - Builder.CreateThreadLocalAddress(CallsiteInfoTLS), {Index}); + I32Ty, Builder.CreateThreadLocalAddress(CallsiteInfoTLS), {Index}); } // Because the context pointer may have LSB set (to indicate scratch), // clear it for the value we use as base address for the counter vector. @@ -367,10 +382,9 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { auto *GEP = Builder.CreateGEP( ThisContextType, RealContext, {Builder.getInt32(0), Builder.getInt32(1), AsStep->getIndex()}); - Builder.CreateStore( - Builder.CreateAdd(Builder.CreateLoad(Builder.getInt64Ty(), GEP), - AsStep->getStep()), - GEP); + Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(I64Ty, GEP), + AsStep->getStep()), + GEP); } break; case llvm::Intrinsic::instrprof_callsite: // callsite lowering: write the called value in the expected callee diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll index 6afa37ef286f5..c2a22c89c43b7 100644 --- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll +++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll @@ -12,8 +12,8 @@ declare void @bar() ; LOWERING: @__llvm_ctx_profile_callsite = external hidden thread_local global ptr ; LOWERING: @__llvm_ctx_profile_expected_callee = external hidden thread_local global ptr ; LOWERING: @[[GLOB0:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer -; LOWERING: @[[GLOB1:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer -; LOWERING: @[[GLOB2:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer +; LOWERING: @[[GLOB1:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } { ptr null, ptr @an_entrypoint, ptr null, ptr null, i8 0 } +; LOWERING: @[[GLOB2:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } { ptr null, ptr @another_entrypoint_no_callees, ptr null, ptr null, i8 0 } ; LOWERING: @[[GLOB3:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer ; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer ; LOWERING: @[[GLOB5:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits