https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/131417
>From ac74a3bbe55cdc1e618af7256519398acd8eaf80 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtro...@google.com> Date: Fri, 14 Mar 2025 15:59:22 -0700 Subject: [PATCH] [ctxprof] Track unhandled call targets --- .../lib/ctx_profile/CtxInstrContextNode.h | 1 + .../lib/ctx_profile/CtxInstrProfiling.cpp | 36 +++++++--- .../lib/ctx_profile/CtxInstrProfiling.h | 15 +++++ .../tests/CtxInstrProfilingTest.cpp | 2 +- .../TestCases/generate-context.cpp | 15 ++++- .../llvm/ProfileData/CtxInstrContextNode.h | 1 + .../llvm/ProfileData/PGOCtxProfReader.h | 26 +++++--- .../llvm/ProfileData/PGOCtxProfWriter.h | 6 +- llvm/lib/ProfileData/PGOCtxProfReader.cpp | 65 ++++++++++++++----- llvm/lib/ProfileData/PGOCtxProfWriter.cpp | 26 ++++++-- .../Instrumentation/PGOInstrumentation.cpp | 13 ++++ .../Inputs/valid-with-unhandled.yaml | 26 ++++++++ .../llvm-ctxprof-util/llvm-ctxprof-util.test | 21 ++++-- .../PGOCtxProfReaderWriterTest.cpp | 14 ++-- 14 files changed, 211 insertions(+), 56 deletions(-) create mode 100644 llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h index 55962df57fb58..a176662b5cb3d 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h +++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h @@ -121,6 +121,7 @@ class ProfileWriter { public: virtual void startContextSection() = 0; virtual void writeContextual(const ctx_profile::ContextNode &RootNode, + const ctx_profile::ContextNode *Unhandled, uint64_t TotalRootEntryCount) = 0; virtual void endContextSection() = 0; diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp index 6ef7076d93e31..c97229dd7f043 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp @@ -246,22 +246,37 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid, ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid, uint32_t NumCounters) { - // 1) if we are under a root (regardless if this thread is collecting or not a + + // 1) if we are currently collecting a contextual profile, fetch a ContextNode + // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted` + // to (hopefully) offset the penalty of creating these contexts to before + // profiling. + // + // 2) if we are under a root (regardless if this thread is collecting or not a // contextual profile for that root), do not collect a flat profile. We want // to keep flat profiles only for activations that can't happen under a root, // to avoid confusing profiles. We can, for example, combine flattened and // flat profiles meaningfully, as we wouldn't double-count anything. // - // 2) to avoid lengthy startup, don't bother with flat profiles until the - // profiling started. We would reset them anyway when profiling starts. + // 3) to avoid lengthy startup, don't bother with flat profiles until the + // profiling has started. We would reset them anyway when profiling starts. // HOWEVER. This does lose profiling for message pumps: those functions are // entered once and never exit. They should be assumed to be entered before // profiling starts - because profiling should start after the server is up // and running (which is equivalent to "message pumps are set up"). - if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted)) - return TheScratchContext; - return markAsScratch( - onContextEnter(*getFlatProfile(Data, Guid, NumCounters))); + ContextRoot *R = __llvm_ctx_profile_current_context_root; + if (!R) { + if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted)) + return TheScratchContext; + else + return markAsScratch( + onContextEnter(*getFlatProfile(Data, Guid, NumCounters))); + } + auto [Iter, Ins] = R->Unhandled.insert({Guid, nullptr}); + if (Ins) + Iter->second = + getCallsiteSlow(Guid, &R->FirstUnhandledCalleeNode, NumCounters, 0); + return markAsScratch(onContextEnter(*Iter->second)); } ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, @@ -396,6 +411,8 @@ void __llvm_ctx_profile_start_collection() { ++NumMemUnits; resetContextNode(*Root->FirstNode); + if (Root->FirstUnhandledCalleeNode) + resetContextNode(*Root->FirstUnhandledCalleeNode); __sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0); } __sanitizer::atomic_store_relaxed(&ProfilingStarted, true); @@ -416,8 +433,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) { __sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid"); return false; } - Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed( - &Root->TotalEntries)); + Writer.writeContextual( + *Root->FirstNode, Root->FirstUnhandledCalleeNode, + __sanitizer::atomic_load_relaxed(&Root->TotalEntries)); } Writer.endContextSection(); Writer.startFlatSection(); diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h index cdbfa571247b7..1e151804ea5b1 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h +++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h @@ -10,6 +10,7 @@ #define CTX_PROFILE_CTXINSTRPROFILING_H_ #include "CtxInstrContextNode.h" +#include "sanitizer_common/sanitizer_dense_map.h" #include "sanitizer_common/sanitizer_mutex.h" #include <sanitizer/common_interface_defs.h> @@ -83,6 +84,20 @@ struct ContextRoot { // Count the number of entries - regardless if we could take the `Taken` mutex ::__sanitizer::atomic_uint64_t TotalEntries = {}; + // Profiles for functions we encounter when collecting a contexutal profile, + // that are not associated with a callsite. This is expected to happen for + // signal handlers, but it also - problematically - currently happens for + // call sites generated after profile instrumentation, primarily + // mem{memset|copy|move|set}. + // `Unhandled` serves 2 purposes: + // 1. identifying such cases (like the memops) + // 2. collecting a profile for them, which can be at least used as a flat + // profile + ::__sanitizer::DenseMap<GUID, ContextNode *> Unhandled; + // Keep the unhandled contexts in a list, as we allocate them, as it makes it + // simpler to send to the writer when the profile is fetched. + ContextNode *FirstUnhandledCalleeNode = nullptr; + // Taken is used to ensure only one thread traverses the contextual graph - // either to read it or to write it. On server side, the same entrypoint will // be entered by numerous threads, but over time, the profile aggregated by diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp index ccb8f0e87fcdd..83756fed0d6e6 100644 --- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp +++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp @@ -240,7 +240,7 @@ TEST_F(ContextTest, Dump) { TestProfileWriter(ContextRoot *Root, size_t Entries) : Root(Root), Entries(Entries) {} - void writeContextual(const ContextNode &Node, + void writeContextual(const ContextNode &Node, const ContextNode *Unhandled, uint64_t TotalRootEntryCount) override { EXPECT_EQ(TotalRootEntryCount, Entries); EXPECT_EQ(EnteredSectionCount, 1); diff --git a/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp b/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp index 319f17debe48f..3dc53637a35d8 100644 --- a/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp +++ b/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp @@ -5,7 +5,8 @@ // RUN: cp %llvm_src/include/llvm/ProfileData/CtxInstrContextNode.h %t_include/ // // Compile with ctx instrumentation "on". We treat "theRoot" as callgraph root. -// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot +// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot \ +// RUN: -mllvm -ctx-prof-skip-callsite-instr=skip_me // // Run the binary, and observe the profile fetch handler's output. // RUN: %t.bin | FileCheck %s @@ -20,11 +21,14 @@ extern "C" bool __llvm_ctx_profile_fetch(ProfileWriter &); // avoid name mangling extern "C" { +__attribute__((noinline)) void skip_me() {} + __attribute__((noinline)) void someFunction(int I) { if (I % 2) printf("check odd\n"); else printf("check even\n"); + skip_me(); } // block inlining because the pre-inliner otherwise will inline this - it's @@ -36,6 +40,7 @@ __attribute__((noinline)) void theRoot() { for (auto I = 0; I < 2; ++I) { someFunction(I); } + skip_me(); } __attribute__((noinline)) void flatFct() { @@ -85,9 +90,13 @@ class TestProfileWriter : public ProfileWriter { } void writeContextual(const ContextNode &RootNode, + const ContextNode *Unhandled, uint64_t EntryCount) override { std::cout << "Entering Root " << RootNode.guid() << " with total entry count " << EntryCount << std::endl; + for (const auto *P = Unhandled; P; P = P->next()) + std::cout << "Unhandled GUID: " << P->guid() << " entered " + << P->entrycount() << " times" << std::endl; printProfile(RootNode, "", ""); } @@ -119,6 +128,8 @@ class TestProfileWriter : public ProfileWriter { // branches would be taken once, so the second counter is 1. // CHECK-NEXT: Entered Context Section // CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1 +// skip_me is entered 4 times: 3 via `someFunction`, and once from `theRoot` +// CHECK-NEXT: Unhandled GUID: 17928815489886282963 entered 4 times // CHECK-NEXT: Guid: 8657661246551306189 // CHECK-NEXT: Entries: 1 // CHECK-NEXT: 2 counters and 3 callsites @@ -135,6 +146,8 @@ class TestProfileWriter : public ProfileWriter { // CHECK-NEXT: Counter values: 2 1 // CHECK-NEXT: Exited Context Section // CHECK-NEXT: Entered Flat Section +// This is `skip_me`. Entered 3 times via `someFunction` +// CHECK-NEXT: Flat: 17928815489886282963 3 // CHECK-NEXT: Flat: 6759619411192316602 3,1 // This is flatFct (guid: 14569438697463215220) // CHECK-NEXT: Flat: 14569438697463215220 1,2 diff --git a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h index 55962df57fb58..a176662b5cb3d 100644 --- a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h +++ b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h @@ -121,6 +121,7 @@ class ProfileWriter { public: virtual void startContextSection() = 0; virtual void writeContextual(const ctx_profile::ContextNode &RootNode, + const ctx_profile::ContextNode *Unhandled, uint64_t TotalRootEntryCount) = 0; virtual void endContextSection() = 0; diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index 65be54323998b..37c2f3d856c73 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -74,6 +74,12 @@ class IndexNode { }; } // namespace internal +// Setting initial capacity to 1 because all contexts must have at least 1 +// counter, and then, because all contexts belonging to a function have the same +// size, there'll be at most one other heap allocation. +using CtxProfFlatProfile = + std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>; + /// A node (context) in the loaded contextual profile, suitable for mutation /// during IPO passes. We generally expect a fraction of counters and /// callsites to be populated. We continue to model counters as vectors, but @@ -93,11 +99,16 @@ class PGOCtxProfContext final : public internal::IndexNode { GlobalValue::GUID GUID = 0; SmallVector<uint64_t, 16> Counters; const std::optional<uint64_t> RootEntryCount; + std::optional<CtxProfFlatProfile> Unhandled{}; CallsiteMapTy Callsites; - PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters, - std::optional<uint64_t> RootEntryCount = std::nullopt) - : GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) { + PGOCtxProfContext( + GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters, + std::optional<uint64_t> RootEntryCount = std::nullopt, + std::optional<CtxProfFlatProfile> &&Unhandled = std::nullopt) + : GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount), + Unhandled(std::move(Unhandled)) { + assert(RootEntryCount.has_value() == Unhandled.has_value()); } Expected<PGOCtxProfContext &> @@ -121,6 +132,8 @@ class PGOCtxProfContext final : public internal::IndexNode { bool isRoot() const { return RootEntryCount.has_value(); } uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); } + const CtxProfFlatProfile &getUnhandled() const { return Unhandled.value(); } + uint64_t getEntrycount() const { assert(!Counters.empty() && "Functions are expected to have at their entry BB instrumented, so " @@ -170,12 +183,6 @@ class PGOCtxProfContext final : public internal::IndexNode { } }; -// Setting initial capacity to 1 because all contexts must have at least 1 -// counter, and then, because all contexts belonging to a function have the same -// size, there'll be at most one other heap allocation. -using CtxProfFlatProfile = - std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>; - using CtxProfContextualProfiles = std::map<GlobalValue::GUID, PGOCtxProfContext>; struct PGOCtxProfile { @@ -205,6 +212,7 @@ class PGOCtxProfileReader final { Error loadContexts(CtxProfContextualProfiles &P); Error loadFlatProfiles(CtxProfFlatProfile &P); + Error loadFlatProfileList(CtxProfFlatProfile &P); public: PGOCtxProfileReader(StringRef Buffer) diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h index b2bb8fea10cfe..36ad5622a8544 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h @@ -36,7 +36,8 @@ enum PGOCtxProfileBlockIDs { ContextNodeBlockID = ContextRootBlockID + 1, FlatProfilesSectionBlockID = ContextNodeBlockID + 1, FlatProfileBlockID = FlatProfilesSectionBlockID + 1, - LAST_VALID = FlatProfileBlockID + UnhandledBlockID = FlatProfileBlockID + 1, + LAST_VALID = UnhandledBlockID }; /// Write one or more ContextNodes to the provided raw_fd_stream. @@ -94,6 +95,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter { void startContextSection() override; void writeContextual(const ctx_profile::ContextNode &RootNode, + const ctx_profile::ContextNode *Unhandled, uint64_t TotalRootEntryCount) override; void endContextSection() override; @@ -104,7 +106,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter { // constants used in writing which a reader may find useful. static constexpr unsigned CodeLen = 2; - static constexpr uint32_t CurrentVersion = 3; + static constexpr uint32_t CurrentVersion = 4; static constexpr unsigned VBREncodingBits = 6; static constexpr StringRef ContainerMagic = "CTXP"; }; diff --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp index f53f2956a7b7e..ec801d43c8588 100644 --- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp +++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp @@ -97,7 +97,7 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) { std::optional<SmallVector<uint64_t, 16>> Counters; std::optional<uint32_t> CallsiteIndex; std::optional<uint64_t> TotalEntryCount; - + std::optional<CtxProfFlatProfile> Unhandled; SmallVector<uint64_t, 1> RecordValues; const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID; @@ -108,14 +108,24 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) { auto GotAllWeNeed = [&]() { return Guid.has_value() && Counters.has_value() && (!ExpectIndex || CallsiteIndex.has_value()) && - (!IsRoot || TotalEntryCount.has_value()); + (!IsRoot || TotalEntryCount.has_value()) && + (!IsRoot || Unhandled.has_value()); }; + while (!GotAllWeNeed()) { RecordValues.clear(); EXPECT_OR_RET(Entry, advance()); - if (Entry->Kind != BitstreamEntry::Record) + if (Entry->Kind != BitstreamEntry::Record) { + if (IsRoot && Entry->Kind == BitstreamEntry::SubBlock && + Entry->ID == PGOCtxProfileBlockIDs::UnhandledBlockID) { + RET_ON_ERR(enterBlockWithID(PGOCtxProfileBlockIDs::UnhandledBlockID)); + Unhandled = CtxProfFlatProfile(); + RET_ON_ERR(loadFlatProfileList(*Unhandled)); + continue; + } return wrongValue( "Expected records before encountering more subcontexts"); + } EXPECT_OR_RET(ReadRecord, Cursor.readRecord(bitc::UNABBREV_RECORD, RecordValues)); switch (*ReadRecord) { @@ -152,7 +162,8 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) { } } - PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount); + PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount, + std::move(Unhandled)); while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) { EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID)); @@ -212,9 +223,7 @@ Error PGOCtxProfileReader::loadContexts(CtxProfContextualProfiles &P) { return Error::success(); } -Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) { - RET_ON_ERR( - enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID)); +Error PGOCtxProfileReader::loadFlatProfileList(CtxProfFlatProfile &P) { while (canEnterBlockWithID(PGOCtxProfileBlockIDs::FlatProfileBlockID)) { EXPECT_OR_RET(E, readProfile(PGOCtxProfileBlockIDs::FlatProfileBlockID)); auto Guid = E->second.guid(); @@ -224,6 +233,12 @@ Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) { return Error::success(); } +Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) { + RET_ON_ERR( + enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID)); + return loadFlatProfileList(P); +} + Expected<PGOCtxProfile> PGOCtxProfileReader::loadProfiles() { RET_ON_ERR(readMetadata()); PGOCtxProfile Ret; @@ -287,10 +302,13 @@ void toYaml(yaml::Output &Out, Out.endSequence(); } +void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat); + void toYaml(yaml::Output &Out, GlobalValue::GUID Guid, const SmallVectorImpl<uint64_t> &Counters, const PGOCtxProfContext::CallsiteMapTy &Callsites, - std::optional<uint64_t> TotalRootEntryCount = std::nullopt) { + std::optional<uint64_t> TotalRootEntryCount = std::nullopt, + CtxProfFlatProfile Unhandled = {}) { yaml::EmptyContext Empty; Out.beginMapping(); void *SaveInfo = nullptr; @@ -318,6 +336,14 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid, Out.endFlowSequence(); Out.postflightKey(nullptr); } + + if (!Unhandled.empty()) { + assert(TotalRootEntryCount.has_value()); + Out.preflightKey("Unhandled", false, false, UseDefault, SaveInfo); + toYaml(Out, Unhandled); + Out.postflightKey(nullptr); + } + if (!Callsites.empty()) { Out.preflightKey("Callsites", true, false, UseDefault, SaveInfo); toYaml(Out, Callsites); @@ -326,10 +352,22 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid, Out.endMapping(); } +void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat) { + void *SaveInfo = nullptr; + Out.beginSequence(); + size_t ElemID = 0; + for (const auto &[Guid, Counters] : Flat) { + Out.preflightElement(ElemID++, SaveInfo); + toYaml(Out, Guid, Counters, {}); + Out.postflightElement(nullptr); + } + Out.endSequence(); +} + void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) { if (Ctx.isRoot()) toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(), - Ctx.getTotalRootEntryCount()); + Ctx.getTotalRootEntryCount(), Ctx.getUnhandled()); else toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites()); } @@ -348,14 +386,7 @@ void llvm::convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile) { } if (!Profile.FlatProfiles.empty()) { Out.preflightKey("FlatProfiles", false, false, UseDefault, SaveInfo); - Out.beginSequence(); - size_t ElemID = 0; - for (const auto &[Guid, Counters] : Profile.FlatProfiles) { - Out.preflightElement(ElemID++, SaveInfo); - toYaml(Out, Guid, Counters, {}); - Out.postflightElement(nullptr); - } - Out.endSequence(); + toYaml(Out, Profile.FlatProfiles); Out.postflightKey(nullptr); } Out.endMapping(); diff --git a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp index 910842647591d..0fdf90e5554b0 100644 --- a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp +++ b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp @@ -58,6 +58,7 @@ PGOCtxProfileWriter::PGOCtxProfileWriter( DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount, "TotalRootEntryCount"); DescribeRecord(PGOCtxProfileRecords::Counters, "Counters"); + DescribeBlock(PGOCtxProfileBlockIDs::UnhandledBlockID, "Unhandled"); DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context"); DescribeRecord(PGOCtxProfileRecords::Guid, "GUID"); DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex"); @@ -135,6 +136,7 @@ void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); } void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); } void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode, + const ContextNode *Unhandled, uint64_t TotalRootEntryCount) { if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 && RootNode.entrycount() == 0))) @@ -143,6 +145,12 @@ void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode, writeGuid(RootNode.guid()); writeRootEntryCount(TotalRootEntryCount); writeCounters({RootNode.counters(), RootNode.counters_size()}); + + Writer.EnterSubblock(PGOCtxProfileBlockIDs::UnhandledBlockID, CodeLen); + for (const auto *P = Unhandled; P; P = P->next()) + writeFlat(P->guid(), P->counters(), P->counters_size()); + Writer.ExitBlock(); + writeSubcontexts(RootNode); Writer.ExitBlock(); } @@ -159,6 +167,9 @@ namespace { /// Representation of the context node suitable for yaml serialization / /// deserialization. +using SerializableFlatProfileRepresentation = + std::pair<ctx_profile::GUID, std::vector<uint64_t>>; + struct SerializableCtxRepresentation { ctx_profile::GUID Guid = 0; std::vector<uint64_t> Counters; @@ -167,11 +178,9 @@ struct SerializableCtxRepresentation { struct SerializableRootRepresentation : public SerializableCtxRepresentation { uint64_t TotalRootEntryCount = 0; + std::vector<SerializableFlatProfileRepresentation> Unhandled; }; -using SerializableFlatProfileRepresentation = - std::pair<ctx_profile::GUID, std::vector<uint64_t>>; - struct SerializableProfileRepresentation { std::vector<SerializableRootRepresentation> Contexts; std::vector<SerializableFlatProfileRepresentation> FlatProfiles; @@ -228,6 +237,7 @@ template <> struct yaml::MappingTraits<SerializableRootRepresentation> { static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) { yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R); IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount); + IO.mapOptional("Unhandled", R.Unhandled); } }; @@ -265,7 +275,15 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) { if (!TopList) return createStringError( "Unexpected error converting internal structure to ctx profile"); - Writer.writeContextual(*TopList, DC.TotalRootEntryCount); + + ctx_profile::ContextNode *FirstUnhandled = nullptr; + for (const auto &U : DC.Unhandled) { + SerializableCtxRepresentation Unhandled; + Unhandled.Guid = U.first; + Unhandled.Counters = U.second; + FirstUnhandled = createNode(Nodes, Unhandled, FirstUnhandled); + } + Writer.writeContextual(*TopList, FirstUnhandled, DC.TotalRootEntryCount); } Writer.endContextSection(); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index a8055979acaa2..44f8eb36d0b1e 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -55,6 +55,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" @@ -338,6 +339,11 @@ cl::opt<bool> PGOInstrumentColdFunctionOnly( "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden, cl::desc("Enable cold function only instrumentation.")); +cl::list<std::string> CtxPGOSkipCallsiteInstrument( + "ctx-prof-skip-callsite-instr", cl::Hidden, + cl::desc("Do not instrument callsites to functions in this list. Intended " + "for testing.")); + extern cl::opt<unsigned> MaxNumVTableAnnotations; namespace llvm { @@ -957,6 +963,10 @@ void FunctionInstrumenter::instrument() { InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); if (IsCtxProf) { + StringSet<> SkipCSInstr; + SkipCSInstr.insert(CtxPGOSkipCallsiteInstrument.begin(), + CtxPGOSkipCallsiteInstrument.end()); + auto *CSIntrinsic = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite); // We want to count the instrumentable callsites, then instrument them. This @@ -973,6 +983,9 @@ void FunctionInstrumenter::instrument() { if (auto *CS = dyn_cast<CallBase>(&Instr)) { if (!InstrProfCallsite::canInstrumentCallsite(*CS)) continue; + if (CS->getCalledFunction() && + SkipCSInstr.contains(CS->getCalledFunction()->getName())) + continue; Visitor(CS); } }; diff --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml new file mode 100644 index 0000000000000..a5eaf18e7718c --- /dev/null +++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml @@ -0,0 +1,26 @@ + +Contexts: + - Guid: 1000 + TotalRootEntryCount: 5 + Counters: [ 1, 2, 3 ] + Callsites: + - [ ] + - - Guid: 2000 + Counters: [ 4, 5 ] + - Guid: 18446744073709551613 + Counters: [ 6, 7, 8 ] + - - Guid: 3000 + Counters: [ 40, 50 ] + - Guid: 18446744073709551612 + TotalRootEntryCount: 45 + Counters: [ 5, 9, 10 ] + Unhandled: + - Guid: 555 + Counters: [ 1, 2, 3 ] + - Guid: 2000 + Counters: [ 1, 2 ] +FlatProfiles: + - Guid: 1234 + Counters: [ 5, 6, 7 ] + - Guid: 5555 + Counters: [ 1 ] diff --git a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test index 937bf89532d35..428f1db06cfd1 100644 --- a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test +++ b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test @@ -23,6 +23,11 @@ ; RUN: llvm-ctxprof-util toYAML -input %t/valid-flat-first.bitstream -output %t/valid-flat-first.yaml ; RUN: diff %t/valid-flat-first.yaml %S/Inputs/valid.yaml +; A variant with unhandled contexts: +; RUN: llvm-ctxprof-util fromYAML -input %S/Inputs/valid-with-unhandled.yaml -output %t/valid-with-unhandled.bitstream +; RUN: llvm-ctxprof-util toYAML -input %t/valid-with-unhandled.bitstream -output %t/valid-with-unhandled.yaml +; RUN: diff %t/valid-with-unhandled.yaml %S/Inputs/valid-with-unhandled.yaml + ; For the valid case, check against a reference output. ; Note that uint64_t are printed as signed values by llvm-bcanalyzer: ; * 18446744073709551613 in yaml is -3 in the output @@ -33,17 +38,19 @@ ; EMPTY: <BLOCKINFO_BLOCK/> ; EMPTY-NEXT: <Metadata NumWords=1 BlockCodeSize=2> -; EMPTY-NEXT: <Version op0=3/> +; EMPTY-NEXT: <Version op0=4/> ; EMPTY-NEXT: </Metadata> ; VALID: <BLOCKINFO_BLOCK/> -; VALID-NEXT: <Metadata NumWords=46 BlockCodeSize=2> -; VALID-NEXT: <Version op0=3/> -; VALID-NEXT: <Contexts NumWords=30 BlockCodeSize=2> -; VALID-NEXT: <Root NumWords=20 BlockCodeSize=2> +; VALID-NEXT: <Metadata NumWords=53 BlockCodeSize=2> +; VALID-NEXT: <Version op0=4/> +; VALID-NEXT: <Contexts NumWords=37 BlockCodeSize=2> +; VALID-NEXT: <Root NumWords=23 BlockCodeSize=2> ; VALID-NEXT: <GUID op0=1000/> ; VALID-NEXT: <TotalRootEntryCount op0=5/> ; VALID-NEXT: <Counters op0=1 op1=2 op2=3/> +; VALID-NEXT: <Unhandled NumWords=1 BlockCodeSize=2> +; VALID-NEXT: </Unhandled> ; VALID-NEXT: <Context NumWords=5 BlockCodeSize=2> ; VALID-NEXT: <GUID op0=-3/> ; VALID-NEXT: <CalleeIndex op0=1/> @@ -60,10 +67,12 @@ ; VALID-NEXT: <Counters op0=40 op1=50/> ; VALID-NEXT: </Context> ; VALID-NEXT: </Root> -; VALID-NEXT: <Root NumWords=5 BlockCodeSize=2> +; VALID-NEXT: <Root NumWords=9 BlockCodeSize=2> ; VALID-NEXT: <GUID op0=-4/> ; VALID-NEXT: <TotalRootEntryCount op0=45/> ; VALID-NEXT: <Counters op0=5 op1=9 op2=10/> +; VALID-NEXT: <Unhandled NumWords=1 BlockCodeSize=2> +; VALID-NEXT: </Unhandled> ; VALID-NEXT: </Root> ; VALID-NEXT: </Contexts> ; VALID-NEXT: <FlatProfiles NumWords=10 BlockCodeSize=2> diff --git a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp index 541667253bfde..426008f1038bb 100644 --- a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp +++ b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp @@ -103,7 +103,7 @@ TEST_F(PGOCtxProfRWTest, RoundTrip) { PGOCtxProfileWriter Writer(Out); Writer.startContextSection(); for (auto &[_, R] : roots()) - Writer.writeContextual(*R, 1); + Writer.writeContextual(*R, nullptr, 1); Writer.endContextSection(); } } @@ -155,7 +155,7 @@ TEST_F(PGOCtxProfRWTest, InvalidCounters) { { PGOCtxProfileWriter Writer(Out); Writer.startContextSection(); - Writer.writeContextual(*R, 2); + Writer.writeContextual(*R, nullptr, 2); Writer.endContextSection(); } } @@ -181,7 +181,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZero) { { PGOCtxProfileWriter Writer(Out); Writer.startContextSection(); - Writer.writeContextual(*R, 42); + Writer.writeContextual(*R, nullptr, 42); Writer.endContextSection(); } } @@ -208,7 +208,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZeroWithOverride) { PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt, /*IncludeEmpty=*/true); Writer.startContextSection(); - Writer.writeContextual(*R, 8); + Writer.writeContextual(*R, nullptr, 8); Writer.endContextSection(); } } @@ -293,8 +293,8 @@ TEST_F(PGOCtxProfRWTest, DuplicateRoots) { PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt, /*IncludeEmpty=*/true); Writer.startContextSection(); - Writer.writeContextual(*createNode(1, 1, 1), 1); - Writer.writeContextual(*createNode(1, 1, 1), 1); + Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1); + Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1); Writer.endContextSection(); } } @@ -322,7 +322,7 @@ TEST_F(PGOCtxProfRWTest, DuplicateTargets) { R->subContexts()[0] = L2; PGOCtxProfileWriter Writer(Out); Writer.startContextSection(); - Writer.writeContextual(*R, 1); + Writer.writeContextual(*R, nullptr, 1); Writer.endContextSection(); } } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits