llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-pgo Author: Mingming Liu (mingmingl-llvm) <details> <summary>Changes</summary> https://github.com/llvm/llvm-project/pull/138170 introduces classes to operate on data access profiles. This change supports the read and write of `DataAccessProfData` in indexed format of MemProf (v4) as well as its the text (yaml) format. For indexed format: * InstrProfWriter owns (by `std::unique_ptr<DataAccessProfData>`) the data access profiles, and gives a non-owned copy when it calls `writeMemProf`. * MemProf v4 header has a new `uint64_t` to record the byte offset of data access profiles. This `uint64_t` field is zero if data access profile is not set (nullptr). * MemProfReader reads the offset from v4 header and de-serializes in-memory bytes into class `DataAccessProfData`. * MemProfYAML.h adds the mapping for DAP class, and make DAP optional for both read and write. --- Patch is 26.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139997.diff 14 Files Affected: - (modified) llvm/include/llvm/ProfileData/DataAccessProf.h (+9-3) - (modified) llvm/include/llvm/ProfileData/IndexedMemProfData.h (+9-3) - (modified) llvm/include/llvm/ProfileData/InstrProfReader.h (+5-1) - (modified) llvm/include/llvm/ProfileData/InstrProfWriter.h (+6) - (modified) llvm/include/llvm/ProfileData/MemProfReader.h (+15) - (modified) llvm/include/llvm/ProfileData/MemProfYAML.h (+58) - (modified) llvm/lib/ProfileData/DataAccessProf.cpp (+4-2) - (modified) llvm/lib/ProfileData/IndexedMemProfData.cpp (+48-13) - (modified) llvm/lib/ProfileData/InstrProfReader.cpp (+14) - (modified) llvm/lib/ProfileData/InstrProfWriter.cpp (+16-4) - (modified) llvm/lib/ProfileData/MemProfReader.cpp (+30) - (modified) llvm/test/tools/llvm-profdata/memprof-yaml.test (+11) - (modified) llvm/tools/llvm-profdata/llvm-profdata.cpp (+4) - (modified) llvm/unittests/ProfileData/DataAccessProfTest.cpp (+6-5) ``````````diff diff --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h index e8504102238d1..f5f6abf0a2817 100644 --- a/llvm/include/llvm/ProfileData/DataAccessProf.h +++ b/llvm/include/llvm/ProfileData/DataAccessProf.h @@ -41,6 +41,8 @@ namespace data_access_prof { struct SourceLocation { SourceLocation(StringRef FileNameRef, uint32_t Line) : FileName(FileNameRef.str()), Line(Line) {} + + SourceLocation() {} /// The filename where the data is located. std::string FileName; /// The line number in the source code. @@ -53,6 +55,8 @@ namespace internal { // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData` // to represent data locations internally. struct SourceLocationRef { + SourceLocationRef(StringRef FileNameRef, uint32_t Line) + : FileName(FileNameRef), Line(Line) {} // The filename where the data is located. StringRef FileName; // The line number in the source code. @@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>; /// The data access profiles for a symbol. struct DataAccessProfRecord { public: - DataAccessProfRecord(SymbolHandleRef SymHandleRef, - ArrayRef<internal::SourceLocationRef> LocRefs) { + DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount, + ArrayRef<internal::SourceLocationRef> LocRefs) + : AccessCount(AccessCount) { if (std::holds_alternative<StringRef>(SymHandleRef)) { SymHandle = std::get<StringRef>(SymHandleRef).str(); } else @@ -110,8 +115,9 @@ struct DataAccessProfRecord { for (auto Loc : LocRefs) Locations.push_back(SourceLocation(Loc.FileName, Loc.Line)); } + DataAccessProfRecord() {} SymbolHandle SymHandle; - + uint64_t AccessCount; // The locations of data in the source code. Optional. SmallVector<SourceLocation> Locations; }; diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h index 3c6c329d1c49d..66fa38472059b 100644 --- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h +++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h @@ -10,14 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" +#include <functional> +#include <optional> + namespace llvm { // Write the MemProf data to OS. -Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion MemProfVersionRequested, - bool MemProfFullSchema); +Error writeMemProf( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData); } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index c250a9ede39bc..a3436e1dfe711 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/Object/BuildID.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" @@ -703,10 +704,13 @@ class IndexedMemProfReader { const unsigned char *CallStackBase = nullptr; // The number of elements in the radix tree array. unsigned RadixTreeSize = 0; + /// The data access profiles, deserialized from binary data. + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr); Error deserializeRadixTreeBased(const unsigned char *Start, - const unsigned char *Ptr); + const unsigned char *Ptr, + memprof::IndexedVersion Version); public: IndexedMemProfReader() = default; diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 67d85daa81623..cf1cec25c3cac 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/BuildID.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Error.h" @@ -81,6 +82,8 @@ class InstrProfWriter { // Whether to generated random memprof hotness for testing. bool MemprofGenerateRandomHotness; + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; + public: // For memprof testing, random hotness can be assigned to the contexts if // MemprofGenerateRandomHotness is enabled. The random seed can be either @@ -122,6 +125,9 @@ class InstrProfWriter { // Add a binary id to the binary ids list. void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs); + void addDataAccessProfData( + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile); + /// Merge existing function counts from the given writer. void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref<void(Error)> Warn); diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 29d9e57cae3e3..02defa189ea7e 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -228,6 +228,21 @@ class YAMLMemProfReader final : public MemProfReader { create(std::unique_ptr<MemoryBuffer> Buffer); void parse(StringRef YAMLData); + + std::unique_ptr<data_access_prof::DataAccessProfData> + takeDataAccessProfData() { + return std::move(DataAccessProfileData); + } + +private: + // Called by `parse` to set data access profiles after parsing them from Yaml + // files. + void setDataAccessProfileData( + std::unique_ptr<data_access_prof::DataAccessProfData> Data) { + DataAccessProfileData = std::move(Data); + } + + std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData; }; } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h index 08dee253f615a..634edc4aa0122 100644 --- a/llvm/include/llvm/ProfileData/MemProfYAML.h +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -2,6 +2,7 @@ #define LLVM_PROFILEDATA_MEMPROFYAML_H_ #include "llvm/ADT/SmallVector.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Format.h" #include "llvm/Support/YAMLTraits.h" @@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair { MemProfRecord Record; }; +// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct +// members use owned strings. This is for simplicity and assumes that most real +// world use cases do look-ups and regression test scale is small. +struct YamlDataAccessProfData { + std::vector<data_access_prof::DataAccessProfRecord> Records; + std::vector<uint64_t> KnownColdHashes; + std::vector<std::string> KnownColdSymbols; + + bool isEmpty() const { + return Records.empty() && KnownColdHashes.empty() && + KnownColdSymbols.empty(); + } +}; + // The top-level data structure, only used with YAML for now. struct AllMemProfData { std::vector<GUIDMemProfRecordPair> HeapProfileRecords; + YamlDataAccessProfData YamlifiedDataAccessProfiles; }; } // namespace memprof @@ -206,9 +222,49 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> { } }; +template <> struct MappingTraits<data_access_prof::SourceLocation> { + static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) { + Io.mapOptional("FileName", Loc.FileName); + Io.mapOptional("Line", Loc.Line); + } +}; + +template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> { + static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) { + if (Io.outputting()) { + if (std::holds_alternative<std::string>(Rec.SymHandle)) { + Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle)); + } else { + Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle)); + } + } else { + std::string SymName; + uint64_t Hash = 0; + Io.mapOptional("Symbol", SymName); + Io.mapOptional("Hash", Hash); + if (!SymName.empty()) { + Rec.SymHandle = SymName; + } else { + Rec.SymHandle = Hash; + } + } + + Io.mapOptional("Locations", Rec.Locations); + } +}; + +template <> struct MappingTraits<memprof::YamlDataAccessProfData> { + static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) { + Io.mapOptional("SampledRecords", Data.Records); + Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols); + Io.mapOptional("KnownColdHashes", Data.KnownColdHashes); + } +}; + template <> struct MappingTraits<memprof::AllMemProfData> { static void mapping(IO &Io, memprof::AllMemProfData &Data) { Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); + Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles); } }; @@ -234,5 +290,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids +LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation) #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_ diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp index c5d0099977cfa..61a73fab7269f 100644 --- a/llvm/lib/ProfileData/DataAccessProf.cpp +++ b/llvm/lib/ProfileData/DataAccessProf.cpp @@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const { auto It = Records.find(Key); if (It != Records.end()) { - return DataAccessProfRecord(Key, It->second.Locations); + return DataAccessProfRecord(Key, It->second.AccessCount, + It->second.Locations); } return std::nullopt; @@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples( auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID)); if (!CanonicalName) return CanonicalName.takeError(); - KnownColdSymbols.insert(*CanonicalName); + KnownColdSymbols.insert( + saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first); return Error::success(); } diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp index 3d20f7a7a5778..cc1b03101c880 100644 --- a/llvm/lib/ProfileData/IndexedMemProfData.cpp +++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/MemProf.h" @@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS, static Error writeMemProfRadixTreeBased( ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion Version, bool MemProfFullSchema) { + memprof::IndexedVersion Version, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { assert((Version == memprof::Version3 || Version == memprof::Version4) && "Unsupported version for radix tree format"); @@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased( OS.write(0ULL); // Reserve space for the memprof call stack payload offset. OS.write(0ULL); // Reserve space for the memprof record payload offset. OS.write(0ULL); // Reserve space for the memprof record table offset. + if (Version == memprof::Version4) + OS.write(0ULL); // Reserve space for the data access profile offset. auto Schema = memprof::getHotColdSchema(); if (MemProfFullSchema) @@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased( uint64_t RecordTableOffset = writeMemProfRecords( OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes); + uint64_t DataAccessProfOffset = 0; + if (DataAccessProfileData.has_value()) { + DataAccessProfOffset = OS.tell(); + if (Error E = (*DataAccessProfileData).get().serialize(OS)) + return E; + } + // Verify that the computation for the number of elements in the call stack // array works. assert(CallStackPayloadOffset + NumElements * sizeof(memprof::LinearFrameId) == RecordPayloadOffset); - uint64_t Header[] = { + SmallVector<uint64_t, 4> Header = { CallStackPayloadOffset, RecordPayloadOffset, RecordTableOffset, }; + if (Version == memprof::Version4) + Header.push_back(DataAccessProfOffset); OS.patch({{HeaderUpdatePos, Header}}); return Error::success(); @@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, bool MemProfFullSchema) { return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3, - MemProfFullSchema); + MemProfFullSchema, std::nullopt); } // Write out MemProf Version4 -static Error writeMemProfV4(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData, - bool MemProfFullSchema) { +static Error writeMemProfV4( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4, - MemProfFullSchema); + MemProfFullSchema, DataAccessProfileData); } // Write out the MemProf data in a requested version. -Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion MemProfVersionRequested, - bool MemProfFullSchema) { +Error writeMemProf( + ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> + DataAccessProfileData) { switch (MemProfVersionRequested) { case memprof::Version2: return writeMemProfV2(OS, MemProfData, MemProfFullSchema); case memprof::Version3: return writeMemProfV3(OS, MemProfData, MemProfFullSchema); case memprof::Version4: - return writeMemProfV4(OS, MemProfData, MemProfFullSchema); + return writeMemProfV4(OS, MemProfData, MemProfFullSchema, + DataAccessProfileData); } return make_error<InstrProfError>( @@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start, } Error IndexedMemProfReader::deserializeRadixTreeBased( - const unsigned char *Start, const unsigned char *Ptr) { + const unsigned char *Start, const unsigned char *Ptr, + memprof::IndexedVersion Version) { + assert((Version == memprof::Version3 || Version == memprof::Version4) && + "Unsupported version for radix tree format"); // The offset in the stream right before invoking // CallStackTableGenerator.Emit. const uint64_t CallStackPayloadOffset = @@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( const uint64_t RecordTableOffset = support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + uint64_t DataAccessProfOffset = 0; + if (Version == memprof::Version4) + DataAccessProfOffset = + support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + // Read the schema. auto SchemaOr = memprof::readMemProfSchema(Ptr); if (!SchemaOr) @@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( /*Payload=*/Start + RecordPayloadOffset, /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); + if (DataAccessProfOffset > RecordTableOffset) { + DataAccessProfileData = + std::make_unique<data_access_prof::DataAccessProfData>(); + const unsigned char *DAPPtr = Start + DataAccessProfOffset; + if (Error E = DataAccessProfileData->deserialize(DAPPtr)) + return E; + } + return Error::success(); } @@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, case memprof::Version3: case memprof::Version4: // V3 and V4 share the same high-level structure (radix tree, linear IDs). - if (Error E = deserializeRadixTreeBased(Start, Ptr)) + if (Error E = deserializeRadixTreeBased(Start, Ptr, Version)) return E; break; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index e6c83430cd8e9..78aba992fcd65 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1551,6 +1551,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const { Pair.Record = std::move(*Record); AllMemProfData.HeapProfileRecords.push_back(std::move(Pair)); } + // Populate the data access profiles for yaml output. + if (DataAccessProfileData != nullptr) { + for (const auto &[SymHandleRef, RecordRef] : + DataAccessProfileData->getRecords()) + AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back( + data_access_prof::DataAccessProfRecord( + SymHandleRef, RecordRef.AccessCount, RecordRef.Locations)); + for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols()) + AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back( + ColdSymbol.str()); + for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes()) + AllMemProfData.YamlifiedDataAccessProfiles.KnownColdHashes.push_back( + Hash); + } return AllMemProfData; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 2759346935b14..b0012ccc4f4bf 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" @@ -29,6 +30,7 @@ #include "llvm/Support/raw_ostream.h" #include <cstdint> #include <ctime> +#include <functional> #include <memory> #include <string> #include <tuple> @@ -152,9 +154,7 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { InfoObj->ValueProfDataEndianness = Endianness; } -void InstrProfWriter::setOutputSparse(bool Sparse) { - this->Sparse = Sparse; -} +void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, function_ref<void(Error)> Warn) { @@ -329,6 +329,12 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { llvm::append_range(BinaryIds, BIs); } +void InstrProfWriter::addDataAccessProfData( + std::unique_ptr<data_access_prof::DataAccessProfData> + DataAccessProfDataIn) { + DataAccessProfileData = std::move(DataAccessProfDataIn); +} + void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); asser... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/139997 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits