https://github.com/mingmingl-llvm created https://github.com/llvm/llvm-project/pull/139997
None >From 75878647c2c36cca00e9d003dc84bf4597e19187 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Tue, 13 May 2025 22:54:59 -0700 Subject: [PATCH] [StaticDataLayout][PGO]Implement reader and writer change for data access profiles --- .../include/llvm/ProfileData/DataAccessProf.h | 12 +++- .../llvm/ProfileData/IndexedMemProfData.h | 12 +++- .../llvm/ProfileData/InstrProfReader.h | 6 +- .../llvm/ProfileData/InstrProfWriter.h | 6 ++ llvm/include/llvm/ProfileData/MemProfReader.h | 12 ++++ llvm/include/llvm/ProfileData/MemProfYAML.h | 65 +++++++++++++++++++ llvm/lib/ProfileData/DataAccessProf.cpp | 6 +- llvm/lib/ProfileData/IndexedMemProfData.cpp | 61 +++++++++++++---- llvm/lib/ProfileData/InstrProfReader.cpp | 14 ++++ llvm/lib/ProfileData/InstrProfWriter.cpp | 20 ++++-- llvm/lib/ProfileData/MemProfReader.cpp | 34 ++++++++++ .../tools/llvm-profdata/memprof-yaml.test | 11 ++++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 5 ++ .../ProfileData/DataAccessProfTest.cpp | 11 ++-- 14 files changed, 244 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h index e8504102238d1..f5f6abf0a2817 100644 --- a/llvm/include/llvm/ProfileData/DataAccessProf.h +++ b/llvm/include/llvm/ProfileData/DataAccessProf.h @@ -41,6 +41,8 @@ namespace data_access_prof { struct SourceLocation { SourceLocation(StringRef FileNameRef, uint32_t Line) : FileName(FileNameRef.str()), Line(Line) {} + + SourceLocation() {} /// The filename where the data is located. std::string FileName; /// The line number in the source code. @@ -53,6 +55,8 @@ namespace internal { // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData` // to represent data locations internally. struct SourceLocationRef { + SourceLocationRef(StringRef FileNameRef, uint32_t Line) + : FileName(FileNameRef), Line(Line) {} // The filename where the data is located. StringRef FileName; // The line number in the source code. @@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>; /// The data access profiles for a symbol. struct DataAccessProfRecord { public: - DataAccessProfRecord(SymbolHandleRef SymHandleRef, - ArrayRef<internal::SourceLocationRef> LocRefs) { + DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount, + ArrayRef<internal::SourceLocationRef> LocRefs) + : AccessCount(AccessCount) { if (std::holds_alternative<StringRef>(SymHandleRef)) { SymHandle = std::get<StringRef>(SymHandleRef).str(); } else @@ -110,8 +115,9 @@ struct DataAccessProfRecord { for (auto Loc : LocRefs) Locations.push_back(SourceLocation(Loc.FileName, Loc.Line)); } + DataAccessProfRecord() {} SymbolHandle SymHandle; - + uint64_t AccessCount; // The locations of data in the source code. Optional. SmallVector<SourceLocation> Locations; }; diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h index 3c6c329d1c49d..66fa38472059b 100644 --- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h +++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h @@ -10,14 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" +#include <functional> +#include <optional> + namespace llvm { // Write the MemProf data to OS. -Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion MemProfVersionRequested, - bool MemProfFullSchema); +Error writeMemProf( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData); } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index c250a9ede39bc..210df6be46f04 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/Object/BuildID.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" @@ -704,9 +705,12 @@ class IndexedMemProfReader { // The number of elements in the radix tree array. unsigned RadixTreeSize = 0; + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; + Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr); Error deserializeRadixTreeBased(const unsigned char *Start, - const unsigned char *Ptr); + const unsigned char *Ptr, + memprof::IndexedVersion Version); public: IndexedMemProfReader() = default; diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 67d85daa81623..cf1cec25c3cac 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/BuildID.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Error.h" @@ -81,6 +82,8 @@ class InstrProfWriter { // Whether to generated random memprof hotness for testing. bool MemprofGenerateRandomHotness; + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; + public: // For memprof testing, random hotness can be assigned to the contexts if // MemprofGenerateRandomHotness is enabled. The random seed can be either @@ -122,6 +125,9 @@ class InstrProfWriter { // Add a binary id to the binary ids list. void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs); + void addDataAccessProfData( + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref<void(Error)> Warn); diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 29d9e57cae3e3..3ff5cfe76b6cb 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -50,6 +50,11 @@ class MemProfReader { // MemProfReader no longer owns the MemProf profile. IndexedMemProfData takeMemProfData() { return std::move(MemProfData); } + std::unique_ptr<data_access_prof::DataAccessProfData> + takeDataAccessProfData() { + return std::move(DataAccessProfileData); + } + virtual Error readNextRecord(GuidMemProfRecordPair &GuidRecord, std::function<const Frame(const FrameId)> Callback = nullptr) { @@ -86,6 +91,11 @@ class MemProfReader { MemProfReader(IndexedMemProfData &&MemProfData) : MemProfData(std::move(MemProfData)) {} + void setDataAccessProfileData( + std::unique_ptr<data_access_prof::DataAccessProfData> Data) { + DataAccessProfileData = std::move(Data); + } + protected: // A helper method to extract the frame from the IdToFrame map. const Frame &idToFrame(const FrameId Id) const { @@ -97,6 +107,8 @@ class MemProfReader { IndexedMemProfData MemProfData; // An iterator to the internal function profile data structure. llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; + + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; }; // Map from id (recorded from sanitizer stack depot) to virtual addresses for diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h index 08dee253f615a..8db73f4bb5a1b 100644 --- a/llvm/include/llvm/ProfileData/MemProfYAML.h +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -2,6 +2,7 @@ #define LLVM_PROFILEDATA_MEMPROFYAML_H_ #include "llvm/ADT/SmallVector.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Format.h" #include "llvm/Support/YAMLTraits.h" @@ -12,6 +13,9 @@ namespace memprof { // serialized and deserialized in YAML. LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64) +LLVM_YAML_STRONG_TYPEDEF(uint64_t, SymbolContentHash) +LLVM_YAML_STRONG_TYPEDEF(std::string, OwnedSymbolName) + // Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields // within MemProfRecord at the same level as if the GUID were part of // MemProfRecord. @@ -20,9 +24,25 @@ struct GUIDMemProfRecordPair { MemProfRecord Record; }; +// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct +// members use owned strings. This is for simplicity and assumes that most real +// world use cases do look-ups and regression test scale is small, so string +// efficiency is not a priority. +struct YamlDataAccessProfData { + std::vector<data_access_prof::DataAccessProfRecord> Records; + std::vector<uint64_t> KnownColdHashes; + std::vector<std::string> KnownColdSymbols; + + bool isEmpty() const { + return Records.empty() && KnownColdHashes.empty() && + KnownColdSymbols.empty(); + } +}; + // The top-level data structure, only used with YAML for now. struct AllMemProfData { std::vector<GUIDMemProfRecordPair> HeapProfileRecords; + YamlDataAccessProfData YamlifiedDataAccessProfiles; }; } // namespace memprof @@ -206,9 +226,50 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> { } }; +template <> struct MappingTraits<data_access_prof::SourceLocation> { + static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) { + Io.mapOptional("FileName", Loc.FileName); + Io.mapOptional("Line", Loc.Line); + } +}; + +template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> { + static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) { + if (Io.outputting()) { + if (std::holds_alternative<std::string>(Rec.SymHandle)) { + Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle)); + } else { + Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle)); + } + } else { + std::string SymName; + uint64_t Hash = 0; + Io.mapOptional("Symbol", SymName); + Io.mapOptional("Hash", Hash); + if (!SymName.empty()) { + Rec.SymHandle = SymName; + } else { + Rec.SymHandle = Hash; + } + } + + Io.mapOptional("Locations", Rec.Locations); + } +}; + +template <> struct MappingTraits<memprof::YamlDataAccessProfData> { + static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) { + Io.mapOptional("SampledRecords", Data.Records); + Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols); + Io.mapOptional("KnownColdHashes", Data.KnownColdHashes); + } +}; + template <> struct MappingTraits<memprof::AllMemProfData> { static void mapping(IO &Io, memprof::AllMemProfData &Data) { Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); + + Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles); } }; @@ -234,5 +295,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids +LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SymbolContentHash) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::OwnedSymbolName) #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_ diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp index c5d0099977cfa..61a73fab7269f 100644 --- a/llvm/lib/ProfileData/DataAccessProf.cpp +++ b/llvm/lib/ProfileData/DataAccessProf.cpp @@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const { auto It = Records.find(Key); if (It != Records.end()) { - return DataAccessProfRecord(Key, It->second.Locations); + return DataAccessProfRecord(Key, It->second.AccessCount, + It->second.Locations); } return std::nullopt; @@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples( auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID)); if (!CanonicalName) return CanonicalName.takeError(); - KnownColdSymbols.insert(*CanonicalName); + KnownColdSymbols.insert( + saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first); return Error::success(); } diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp index 3d20f7a7a5778..cc1b03101c880 100644 --- a/llvm/lib/ProfileData/IndexedMemProfData.cpp +++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/MemProf.h" @@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS, static Error writeMemProfRadixTreeBased( ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion Version, bool MemProfFullSchema) { + memprof::IndexedVersion Version, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { assert((Version == memprof::Version3 || Version == memprof::Version4) && "Unsupported version for radix tree format"); @@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased( OS.write(0ULL); // Reserve space for the memprof call stack payload offset. OS.write(0ULL); // Reserve space for the memprof record payload offset. OS.write(0ULL); // Reserve space for the memprof record table offset. + if (Version == memprof::Version4) + OS.write(0ULL); // Reserve space for the data access profile offset. auto Schema = memprof::getHotColdSchema(); if (MemProfFullSchema) @@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased( uint64_t RecordTableOffset = writeMemProfRecords( OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes); + uint64_t DataAccessProfOffset = 0; + if (DataAccessProfileData.has_value()) { + DataAccessProfOffset = OS.tell(); + if (Error E = (*DataAccessProfileData).get().serialize(OS)) + return E; + } + // Verify that the computation for the number of elements in the call stack // array works. assert(CallStackPayloadOffset + NumElements * sizeof(memprof::LinearFrameId) == RecordPayloadOffset); - uint64_t Header[] = { + SmallVector<uint64_t, 4> Header = { CallStackPayloadOffset, RecordPayloadOffset, RecordTableOffset, }; + if (Version == memprof::Version4) + Header.push_back(DataAccessProfOffset); OS.patch({{HeaderUpdatePos, Header}}); return Error::success(); @@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, bool MemProfFullSchema) { return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3, - MemProfFullSchema); + MemProfFullSchema, std::nullopt); } // Write out MemProf Version4 -static Error writeMemProfV4(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData, - bool MemProfFullSchema) { +static Error writeMemProfV4( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4, - MemProfFullSchema); + MemProfFullSchema, DataAccessProfileData); } // Write out the MemProf data in a requested version. -Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion MemProfVersionRequested, - bool MemProfFullSchema) { +Error writeMemProf( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { switch (MemProfVersionRequested) { case memprof::Version2: return writeMemProfV2(OS, MemProfData, MemProfFullSchema); case memprof::Version3: return writeMemProfV3(OS, MemProfData, MemProfFullSchema); case memprof::Version4: - return writeMemProfV4(OS, MemProfData, MemProfFullSchema); + return writeMemProfV4(OS, MemProfData, MemProfFullSchema, + DataAccessProfileData); } return make_error<InstrProfError>( @@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start, } Error IndexedMemProfReader::deserializeRadixTreeBased( - const unsigned char *Start, const unsigned char *Ptr) { + const unsigned char *Start, const unsigned char *Ptr, + memprof::IndexedVersion Version) { + assert((Version == memprof::Version3 || Version == memprof::Version4) && + "Unsupported version for radix tree format"); // The offset in the stream right before invoking // CallStackTableGenerator.Emit. const uint64_t CallStackPayloadOffset = @@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( const uint64_t RecordTableOffset = support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + uint64_t DataAccessProfOffset = 0; + if (Version == memprof::Version4) + DataAccessProfOffset = + support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + // Read the schema. auto SchemaOr = memprof::readMemProfSchema(Ptr); if (!SchemaOr) @@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( /*Payload=*/Start + RecordPayloadOffset, /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); + if (DataAccessProfOffset > RecordTableOffset) { + DataAccessProfileData = + std::make_unique<data_access_prof::DataAccessProfData>(); + const unsigned char *DAPPtr = Start + DataAccessProfOffset; + if (Error E = DataAccessProfileData->deserialize(DAPPtr)) + return E; + } + return Error::success(); } @@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, case memprof::Version3: case memprof::Version4: // V3 and V4 share the same high-level structure (radix tree, linear IDs). - if (Error E = deserializeRadixTreeBased(Start, Ptr)) + if (Error E = deserializeRadixTreeBased(Start, Ptr, Version)) return E; break; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index e6c83430cd8e9..696a2518e148e 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1551,6 +1551,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const { Pair.Record = std::move(*Record); AllMemProfData.HeapProfileRecords.push_back(std::move(Pair)); } + // Populate the data access profiles. + if (DataAccessProfileData != nullptr) { + for (const auto &[SymHandleRef, RecordRef] : + DataAccessProfileData->getRecords()) + AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back( + data_access_prof::DataAccessProfRecord( + SymHandleRef, RecordRef.AccessCount, RecordRef.Locations)); + for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols()) + AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back( + ColdSymbol.str()); + for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes()) + AllMemProfData.YamlifiedDataAccessProfiles.KnownColdHashes.push_back( + Hash); + } return AllMemProfData; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 2759346935b14..7217826d550a7 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" @@ -29,6 +30,7 @@ #include "llvm/Support/raw_ostream.h" #include <cstdint> #include <ctime> +#include <functional> #include <memory> #include <string> #include <tuple> @@ -152,9 +154,7 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { InfoObj->ValueProfDataEndianness = Endianness; } -void InstrProfWriter::setOutputSparse(bool Sparse) { - this->Sparse = Sparse; -} +void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, function_ref<void(Error)> Warn) { @@ -329,6 +329,12 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { llvm::append_range(BinaryIds, BIs); } +void InstrProfWriter::addDataAccessProfData( + std::unique_ptr<data_access_prof::DataAccessProfData> + DataAccessProfDataIn) { + DataAccessProfileData = std::move(DataAccessProfDataIn); +} + void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); assert(!Trace.FunctionNameRefs.empty()); @@ -614,8 +620,14 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { uint64_t MemProfSectionStart = 0; if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { MemProfSectionStart = OS.tell(); + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DAP = std::nullopt; + if (DataAccessProfileData.get() != nullptr) { + DAP = std::ref(*DataAccessProfileData.get()); + } if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested, - MemProfFullSchema)) + MemProfFullSchema, DAP)) + return E; } diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index e0f280b9eb2f6..1562b7d5bb01b 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -823,6 +824,39 @@ void YAMLMemProfReader::parse(StringRef YAMLData) { MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord)); } + + if (Doc.YamlifiedDataAccessProfiles.isEmpty()) + return; + + auto ToSymHandleRef = [](data_access_prof::SymbolHandle Handle) + -> data_access_prof::SymbolHandleRef { + if (std::holds_alternative<std::string>(Handle)) + return StringRef(std::get<std::string>(Handle)); + return std::get<uint64_t>(Handle); + }; + + auto DataAccessProfileData = + std::make_unique<data_access_prof::DataAccessProfData>(); + for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records) { + if (Error E = DataAccessProfileData->setDataAccessProfile( + ToSymHandleRef(Record.SymHandle), Record.AccessCount, + Record.Locations)) { + reportFatalInternalError(std::move(E)); + } + } + for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdHashes) + if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Hash)) { + reportFatalInternalError(std::move(E)); + } + for (const std::string &Sym : + Doc.YamlifiedDataAccessProfiles.KnownColdSymbols) { + errs() << "Sym " << Sym << "\n"; + if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Sym)) { + reportFatalInternalError(std::move(E)); + } + } + + setDataAccessProfileData(std::move(DataAccessProfileData)); } } // namespace memprof } // namespace llvm diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test index 9766cc50f37d7..5e0c7fb3ea1d8 100644 --- a/llvm/test/tools/llvm-profdata/memprof-yaml.test +++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test @@ -35,4 +35,15 @@ HeapProfileRecords: - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true } - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false } CalleeGuids: [ 0x300 ] +DataAccessProfiles: + SampledRecords: + - Symbol: abcde + - Hash: 101010 + Locations: + - FileName: file + Line: 233 + KnownColdSymbols: + - foo + - bar + KnownColdHashes: [ 999, 1001 ] ... diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 885e06df6c390..87ddca230b304 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -16,6 +16,7 @@ #include "llvm/Debuginfod/HTTPClient.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Binary.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" @@ -756,6 +757,8 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, auto MemProfData = Reader->takeMemProfData(); + auto DataAccessProfData = Reader->takeDataAccessProfData(); + // Check for the empty input in case the YAML file is invalid. if (MemProfData.Records.empty()) { WC->Errors.emplace_back( @@ -764,6 +767,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, } WC->Writer.addMemProfData(std::move(MemProfData), MemProfError); + WC->Writer.addDataAccessProfData(std::move(DataAccessProfData)); return; } @@ -3308,6 +3312,7 @@ static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) { auto Reader = std::move(ReaderOrErr.get()); memprof::AllMemProfData Data = Reader->getAllMemProfData(); + // Construct yaml::Output with the maximum column width of 80 so that each // Frame fits in one line. yaml::Output Yout(OS, nullptr, 80); diff --git a/llvm/unittests/ProfileData/DataAccessProfTest.cpp b/llvm/unittests/ProfileData/DataAccessProfTest.cpp index 127230d4805e7..084a8e96cdafe 100644 --- a/llvm/unittests/ProfileData/DataAccessProfTest.cpp +++ b/llvm/unittests/ProfileData/DataAccessProfTest.cpp @@ -78,7 +78,7 @@ TEST(MemProf, DataAccessProfile) { // Test that symbol names and file names are stored in the input order. EXPECT_THAT( llvm::to_vector(llvm::make_first_range(Data.getStrToIndexMapRef())), - ElementsAre("foo", "bar.__uniq.321", "file2", "file1")); + ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", "file1")); EXPECT_THAT(Data.getKnownColdSymbols(), ElementsAre("sym2", "sym1")); EXPECT_THAT(Data.getKnownColdHashes(), ElementsAre(789, 678)); @@ -134,9 +134,10 @@ TEST(MemProf, DataAccessProfile) { testing::IsEmpty()); EXPECT_FALSE(deserializedData.deserialize(p)); - EXPECT_THAT(llvm::to_vector(llvm::make_first_range( - deserializedData.getStrToIndexMapRef())), - ElementsAre("foo", "bar.__uniq.321", "file2", "file1")); + EXPECT_THAT( + llvm::to_vector( + llvm::make_first_range(deserializedData.getStrToIndexMapRef())), + ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", "file1")); EXPECT_THAT(deserializedData.getKnownColdSymbols(), ElementsAre("sym2", "sym1")); EXPECT_THAT(deserializedData.getKnownColdHashes(), ElementsAre(789, 678)); @@ -158,7 +159,7 @@ TEST(MemProf, DataAccessProfile) { Field(&DataAccessProfRecordRef::AccessCount, 100), Field(&DataAccessProfRecordRef::IsStringLiteral, false), Field(&DataAccessProfRecordRef::Locations, testing::IsEmpty())), - AllOf(Field(&DataAccessProfRecordRef::SymbolID, 1), + AllOf(Field(&DataAccessProfRecordRef::SymbolID, 2), Field(&DataAccessProfRecordRef::AccessCount, 123), Field(&DataAccessProfRecordRef::IsStringLiteral, false), Field(&DataAccessProfRecordRef::Locations, _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits