https://github.com/mingmingl-llvm updated 
https://github.com/llvm/llvm-project/pull/139997

>From d15ae3e73589b50edeb2ab823976734eaa3806e5 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmi...@google.com>
Date: Thu, 15 May 2025 09:56:52 -0700
Subject: [PATCH] Support reading and writing data access profiles in memprof
 v4.

---
 .../include/llvm/ProfileData/DataAccessProf.h | 12 +++-
 .../llvm/ProfileData/IndexedMemProfData.h     | 12 +++-
 .../llvm/ProfileData/InstrProfReader.h        |  6 +-
 .../llvm/ProfileData/InstrProfWriter.h        |  6 ++
 llvm/include/llvm/ProfileData/MemProfReader.h | 15 +++++
 llvm/include/llvm/ProfileData/MemProfYAML.h   | 65 +++++++++++++++++++
 llvm/lib/ProfileData/DataAccessProf.cpp       |  6 +-
 llvm/lib/ProfileData/IndexedMemProfData.cpp   | 61 +++++++++++++----
 llvm/lib/ProfileData/InstrProfReader.cpp      | 14 ++++
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 20 ++++--
 llvm/lib/ProfileData/MemProfReader.cpp        | 30 +++++++++
 .../tools/llvm-profdata/memprof-yaml.test     | 11 ++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  4 ++
 .../ProfileData/DataAccessProfTest.cpp        | 11 ++--
 14 files changed, 242 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/DataAccessProf.h 
b/llvm/include/llvm/ProfileData/DataAccessProf.h
index e8504102238d1..f5f6abf0a2817 100644
--- a/llvm/include/llvm/ProfileData/DataAccessProf.h
+++ b/llvm/include/llvm/ProfileData/DataAccessProf.h
@@ -41,6 +41,8 @@ namespace data_access_prof {
 struct SourceLocation {
   SourceLocation(StringRef FileNameRef, uint32_t Line)
       : FileName(FileNameRef.str()), Line(Line) {}
+
+  SourceLocation() {}
   /// The filename where the data is located.
   std::string FileName;
   /// The line number in the source code.
@@ -53,6 +55,8 @@ namespace internal {
 // which strings are owned by `DataAccessProfData`. Used by 
`DataAccessProfData`
 // to represent data locations internally.
 struct SourceLocationRef {
+  SourceLocationRef(StringRef FileNameRef, uint32_t Line)
+      : FileName(FileNameRef), Line(Line) {}
   // The filename where the data is located.
   StringRef FileName;
   // The line number in the source code.
@@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
 /// The data access profiles for a symbol.
 struct DataAccessProfRecord {
 public:
-  DataAccessProfRecord(SymbolHandleRef SymHandleRef,
-                       ArrayRef<internal::SourceLocationRef> LocRefs) {
+  DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
+                       ArrayRef<internal::SourceLocationRef> LocRefs)
+      : AccessCount(AccessCount) {
     if (std::holds_alternative<StringRef>(SymHandleRef)) {
       SymHandle = std::get<StringRef>(SymHandleRef).str();
     } else
@@ -110,8 +115,9 @@ struct DataAccessProfRecord {
     for (auto Loc : LocRefs)
       Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
   }
+  DataAccessProfRecord() {}
   SymbolHandle SymHandle;
-
+  uint64_t AccessCount;
   // The locations of data in the source code. Optional.
   SmallVector<SourceLocation> Locations;
 };
diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h 
b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index 3c6c329d1c49d..66fa38472059b 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -10,14 +10,20 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 
+#include <functional>
+#include <optional>
+
 namespace llvm {
 
 // Write the MemProf data to OS.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema);
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+        DataAccessProfileData);
 
 } // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h 
b/llvm/include/llvm/ProfileData/InstrProfReader.h
index c250a9ede39bc..210df6be46f04 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -704,9 +705,12 @@ class IndexedMemProfReader {
   // The number of elements in the radix tree array.
   unsigned RadixTreeSize = 0;
 
+  std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
+
   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
   Error deserializeRadixTreeBased(const unsigned char *Start,
-                                  const unsigned char *Ptr);
+                                  const unsigned char *Ptr,
+                                  memprof::IndexedVersion Version);
 
 public:
   IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h 
b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 67d85daa81623..cf1cec25c3cac 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
   // Whether to generated random memprof hotness for testing.
   bool MemprofGenerateRandomHotness;
 
+  std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
+
 public:
   // For memprof testing, random hotness can be assigned to the contexts if
   // MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
   // Add a binary id to the binary ids list.
   void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
 
+  void addDataAccessProfData(
+      std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile);
+
   /// Merge existing function counts from the given writer.
   void mergeRecordsFromWriter(InstrProfWriter &&IPW,
                               function_ref<void(Error)> Warn);
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h 
b/llvm/include/llvm/ProfileData/MemProfReader.h
index 29d9e57cae3e3..02defa189ea7e 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -228,6 +228,21 @@ class YAMLMemProfReader final : public MemProfReader {
   create(std::unique_ptr<MemoryBuffer> Buffer);
 
   void parse(StringRef YAMLData);
+
+  std::unique_ptr<data_access_prof::DataAccessProfData>
+  takeDataAccessProfData() {
+    return std::move(DataAccessProfileData);
+  }
+
+private:
+  // Called by `parse` to set data access profiles after parsing them from Yaml
+  // files.
+  void setDataAccessProfileData(
+      std::unique_ptr<data_access_prof::DataAccessProfData> Data) {
+    DataAccessProfileData = std::move(Data);
+  }
+
+  std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
 };
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h 
b/llvm/include/llvm/ProfileData/MemProfYAML.h
index 08dee253f615a..8db73f4bb5a1b 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -2,6 +2,7 @@
 #define LLVM_PROFILEDATA_MEMPROFYAML_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -12,6 +13,9 @@ namespace memprof {
 // serialized and deserialized in YAML.
 LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64)
 
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, SymbolContentHash)
+LLVM_YAML_STRONG_TYPEDEF(std::string, OwnedSymbolName)
+
 // Helper struct for AllMemProfData.  In YAML, we treat the GUID and the fields
 // within MemProfRecord at the same level as if the GUID were part of
 // MemProfRecord.
@@ -20,9 +24,25 @@ struct GUIDMemProfRecordPair {
   MemProfRecord Record;
 };
 
+// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct
+// members use owned strings. This is for simplicity and assumes that most real
+// world use cases do look-ups and regression test scale is small, so string
+// efficiency is not a priority.
+struct YamlDataAccessProfData {
+  std::vector<data_access_prof::DataAccessProfRecord> Records;
+  std::vector<uint64_t> KnownColdHashes;
+  std::vector<std::string> KnownColdSymbols;
+
+  bool isEmpty() const {
+    return Records.empty() && KnownColdHashes.empty() &&
+           KnownColdSymbols.empty();
+  }
+};
+
 // The top-level data structure, only used with YAML for now.
 struct AllMemProfData {
   std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+  YamlDataAccessProfData YamlifiedDataAccessProfiles;
 };
 } // namespace memprof
 
@@ -206,9 +226,50 @@ template <> struct 
MappingTraits<memprof::GUIDMemProfRecordPair> {
   }
 };
 
+template <> struct MappingTraits<data_access_prof::SourceLocation> {
+  static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) {
+    Io.mapOptional("FileName", Loc.FileName);
+    Io.mapOptional("Line", Loc.Line);
+  }
+};
+
+template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> {
+  static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) {
+    if (Io.outputting()) {
+      if (std::holds_alternative<std::string>(Rec.SymHandle)) {
+        Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
+      } else {
+        Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
+      }
+    } else {
+      std::string SymName;
+      uint64_t Hash = 0;
+      Io.mapOptional("Symbol", SymName);
+      Io.mapOptional("Hash", Hash);
+      if (!SymName.empty()) {
+        Rec.SymHandle = SymName;
+      } else {
+        Rec.SymHandle = Hash;
+      }
+    }
+
+    Io.mapOptional("Locations", Rec.Locations);
+  }
+};
+
+template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
+  static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
+    Io.mapOptional("SampledRecords", Data.Records);
+    Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
+    Io.mapOptional("KnownColdHashes", Data.KnownColdHashes);
+  }
+};
+
 template <> struct MappingTraits<memprof::AllMemProfData> {
   static void mapping(IO &Io, memprof::AllMemProfData &Data) {
     Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+
+    Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
   }
 };
 
@@ -234,5 +295,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SymbolContentHash)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::OwnedSymbolName)
 
 #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp 
b/llvm/lib/ProfileData/DataAccessProf.cpp
index c5d0099977cfa..61a73fab7269f 100644
--- a/llvm/lib/ProfileData/DataAccessProf.cpp
+++ b/llvm/lib/ProfileData/DataAccessProf.cpp
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef 
SymbolID) const {
 
   auto It = Records.find(Key);
   if (It != Records.end()) {
-    return DataAccessProfRecord(Key, It->second.Locations);
+    return DataAccessProfRecord(Key, It->second.AccessCount,
+                                It->second.Locations);
   }
 
   return std::nullopt;
@@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples(
   auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
   if (!CanonicalName)
     return CanonicalName.takeError();
-  KnownColdSymbols.insert(*CanonicalName);
+  KnownColdSymbols.insert(
+      saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
   return Error::success();
 }
 
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp 
b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 3d20f7a7a5778..cc1b03101c880 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -10,6 +10,7 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS,
 
 static Error writeMemProfRadixTreeBased(
     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-    memprof::IndexedVersion Version, bool MemProfFullSchema) {
+    memprof::IndexedVersion Version, bool MemProfFullSchema,
+    std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+        DataAccessProfileData) {
   assert((Version == memprof::Version3 || Version == memprof::Version4) &&
          "Unsupported version for radix tree format");
 
@@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased(
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof record payload offset.
   OS.write(0ULL); // Reserve space for the memprof record table offset.
+  if (Version == memprof::Version4)
+    OS.write(0ULL); // Reserve space for the data access profile offset.
 
   auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
@@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased(
   uint64_t RecordTableOffset = writeMemProfRecords(
       OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (DataAccessProfileData.has_value()) {
+    DataAccessProfOffset = OS.tell();
+    if (Error E = (*DataAccessProfileData).get().serialize(OS))
+      return E;
+  }
+
   // Verify that the computation for the number of elements in the call stack
   // array works.
   assert(CallStackPayloadOffset +
              NumElements * sizeof(memprof::LinearFrameId) ==
          RecordPayloadOffset);
 
-  uint64_t Header[] = {
+  SmallVector<uint64_t, 4> Header = {
       CallStackPayloadOffset,
       RecordPayloadOffset,
       RecordTableOffset,
   };
+  if (Version == memprof::Version4)
+    Header.push_back(DataAccessProfOffset);
   OS.patch({{HeaderUpdatePos, Header}});
 
   return Error::success();
@@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS,
                             memprof::IndexedMemProfData &MemProfData,
                             bool MemProfFullSchema) {
   return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
-                                    MemProfFullSchema);
+                                    MemProfFullSchema, std::nullopt);
 }
 
 // Write out MemProf Version4
-static Error writeMemProfV4(ProfOStream &OS,
-                            memprof::IndexedMemProfData &MemProfData,
-                            bool MemProfFullSchema) {
+static Error writeMemProfV4(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    bool MemProfFullSchema,
+    std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+        DataAccessProfileData) {
   return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
-                                    MemProfFullSchema);
+                                    MemProfFullSchema, DataAccessProfileData);
 }
 
 // Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema) {
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+        DataAccessProfileData) {
   switch (MemProfVersionRequested) {
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
   case memprof::Version3:
     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
   case memprof::Version4:
-    return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
+    return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
+                          DataAccessProfileData);
   }
 
   return make_error<InstrProfError>(
@@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned 
char *Start,
 }
 
 Error IndexedMemProfReader::deserializeRadixTreeBased(
-    const unsigned char *Start, const unsigned char *Ptr) {
+    const unsigned char *Start, const unsigned char *Ptr,
+    memprof::IndexedVersion Version) {
+  assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+         "Unsupported version for radix tree format");
   // The offset in the stream right before invoking
   // CallStackTableGenerator.Emit.
   const uint64_t CallStackPayloadOffset =
@@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
   const uint64_t RecordTableOffset =
       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (Version == memprof::Version4)
+    DataAccessProfOffset =
+        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
   // Read the schema.
   auto SchemaOr = memprof::readMemProfSchema(Ptr);
   if (!SchemaOr)
@@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
       /*Payload=*/Start + RecordPayloadOffset,
       /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
 
+  if (DataAccessProfOffset > RecordTableOffset) {
+    DataAccessProfileData =
+        std::make_unique<data_access_prof::DataAccessProfData>();
+    const unsigned char *DAPPtr = Start + DataAccessProfOffset;
+    if (Error E = DataAccessProfileData->deserialize(DAPPtr))
+      return E;
+  }
+
   return Error::success();
 }
 
@@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char 
*Start,
   case memprof::Version3:
   case memprof::Version4:
     // V3 and V4 share the same high-level structure (radix tree, linear IDs).
-    if (Error E = deserializeRadixTreeBased(Start, Ptr))
+    if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
       return E;
     break;
   }
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp 
b/llvm/lib/ProfileData/InstrProfReader.cpp
index e6c83430cd8e9..78aba992fcd65 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1551,6 +1551,20 @@ memprof::AllMemProfData 
IndexedMemProfReader::getAllMemProfData() const {
     Pair.Record = std::move(*Record);
     AllMemProfData.HeapProfileRecords.push_back(std::move(Pair));
   }
+  // Populate the data access profiles for yaml output.
+  if (DataAccessProfileData != nullptr) {
+    for (const auto &[SymHandleRef, RecordRef] :
+         DataAccessProfileData->getRecords())
+      AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
+          data_access_prof::DataAccessProfRecord(
+              SymHandleRef, RecordRef.AccessCount, RecordRef.Locations));
+    for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
+          ColdSymbol.str());
+    for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdHashes.push_back(
+          Hash);
+  }
   return AllMemProfData;
 }
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp 
b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2759346935b14..b0012ccc4f4bf 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/IndexedMemProfData.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -29,6 +30,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 #include <ctime>
+#include <functional>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -152,9 +154,7 @@ void 
InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
   InfoObj->ValueProfDataEndianness = Endianness;
 }
 
-void InstrProfWriter::setOutputSparse(bool Sparse) {
-  this->Sparse = Sparse;
-}
+void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; }
 
 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
                                 function_ref<void(Error)> Warn) {
@@ -329,6 +329,12 @@ void 
InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
   llvm::append_range(BinaryIds, BIs);
 }
 
+void InstrProfWriter::addDataAccessProfData(
+    std::unique_ptr<data_access_prof::DataAccessProfData>
+        DataAccessProfDataIn) {
+  DataAccessProfileData = std::move(DataAccessProfDataIn);
+}
+
 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
   assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
   assert(!Trace.FunctionNameRefs.empty());
@@ -614,8 +620,14 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t MemProfSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
     MemProfSectionStart = OS.tell();
+    std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+        DAP = std::nullopt;
+    if (DataAccessProfileData.get() != nullptr)
+      DAP = std::ref(*DataAccessProfileData.get());
+
     if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
-                              MemProfFullSchema))
+                              MemProfFullSchema, DAP))
+
       return E;
   }
 
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index e0f280b9eb2f6..7fe5145091789 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 
@@ -823,6 +824,35 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
 
     MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
   }
+
+  if (Doc.YamlifiedDataAccessProfiles.isEmpty())
+    return;
+
+  auto ToSymHandleRef = [](data_access_prof::SymbolHandle Handle)
+      -> data_access_prof::SymbolHandleRef {
+    if (std::holds_alternative<std::string>(Handle))
+      return StringRef(std::get<std::string>(Handle));
+    return std::get<uint64_t>(Handle);
+  };
+
+  auto DataAccessProfileData =
+      std::make_unique<data_access_prof::DataAccessProfData>();
+  for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records)
+    if (Error E = DataAccessProfileData->setDataAccessProfile(
+            ToSymHandleRef(Record.SymHandle), Record.AccessCount,
+            Record.Locations))
+      reportFatalInternalError(std::move(E));
+
+  for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdHashes)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Hash))
+      reportFatalInternalError(std::move(E));
+
+  for (const std::string &Sym :
+       Doc.YamlifiedDataAccessProfiles.KnownColdSymbols)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Sym))
+      reportFatalInternalError(std::move(E));
+
+  setDataAccessProfileData(std::move(DataAccessProfileData));
 }
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test 
b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 9766cc50f37d7..5e0c7fb3ea1d8 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -35,4 +35,15 @@ HeapProfileRecords:
         - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, 
IsInlineFrame: true }
         - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, 
IsInlineFrame: false }
         CalleeGuids: [ 0x300 ]
+DataAccessProfiles:
+  SampledRecords:
+    - Symbol:          abcde
+    - Hash:            101010
+      Locations:
+        - FileName:        file
+          Line:            233
+  KnownColdSymbols:
+    - foo
+    - bar
+  KnownColdHashes: [ 999, 1001 ]
 ...
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp 
b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 885e06df6c390..8660eed6be2bf 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Debuginfod/HTTPClient.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
@@ -756,6 +757,8 @@ loadInput(const WeightedFile &Input, SymbolRemapper 
*Remapper,
 
     auto MemProfData = Reader->takeMemProfData();
 
+    auto DataAccessProfData = Reader->takeDataAccessProfData();
+
     // Check for the empty input in case the YAML file is invalid.
     if (MemProfData.Records.empty()) {
       WC->Errors.emplace_back(
@@ -764,6 +767,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper 
*Remapper,
     }
 
     WC->Writer.addMemProfData(std::move(MemProfData), MemProfError);
+    WC->Writer.addDataAccessProfData(std::move(DataAccessProfData));
     return;
   }
 
diff --git a/llvm/unittests/ProfileData/DataAccessProfTest.cpp 
b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
index 127230d4805e7..084a8e96cdafe 100644
--- a/llvm/unittests/ProfileData/DataAccessProfTest.cpp
+++ b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
@@ -78,7 +78,7 @@ TEST(MemProf, DataAccessProfile) {
     // Test that symbol names and file names are stored in the input order.
     EXPECT_THAT(
         llvm::to_vector(llvm::make_first_range(Data.getStrToIndexMapRef())),
-        ElementsAre("foo", "bar.__uniq.321", "file2", "file1"));
+        ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", 
"file1"));
     EXPECT_THAT(Data.getKnownColdSymbols(), ElementsAre("sym2", "sym1"));
     EXPECT_THAT(Data.getKnownColdHashes(), ElementsAre(789, 678));
 
@@ -134,9 +134,10 @@ TEST(MemProf, DataAccessProfile) {
                 testing::IsEmpty());
     EXPECT_FALSE(deserializedData.deserialize(p));
 
-    EXPECT_THAT(llvm::to_vector(llvm::make_first_range(
-                    deserializedData.getStrToIndexMapRef())),
-                ElementsAre("foo", "bar.__uniq.321", "file2", "file1"));
+    EXPECT_THAT(
+        llvm::to_vector(
+            llvm::make_first_range(deserializedData.getStrToIndexMapRef())),
+        ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", 
"file1"));
     EXPECT_THAT(deserializedData.getKnownColdSymbols(),
                 ElementsAre("sym2", "sym1"));
     EXPECT_THAT(deserializedData.getKnownColdHashes(), ElementsAre(789, 678));
@@ -158,7 +159,7 @@ TEST(MemProf, DataAccessProfile) {
                 Field(&DataAccessProfRecordRef::AccessCount, 100),
                 Field(&DataAccessProfRecordRef::IsStringLiteral, false),
                 Field(&DataAccessProfRecordRef::Locations, 
testing::IsEmpty())),
-            AllOf(Field(&DataAccessProfRecordRef::SymbolID, 1),
+            AllOf(Field(&DataAccessProfRecordRef::SymbolID, 2),
                   Field(&DataAccessProfRecordRef::AccessCount, 123),
                   Field(&DataAccessProfRecordRef::IsStringLiteral, false),
                   Field(&DataAccessProfRecordRef::Locations,

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to