https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/98125
>From cf32a43e7c2b04079c6123fe13df4fb7226d771f Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Tue, 9 Jul 2024 10:04:25 -0700 Subject: [PATCH 1/2] Comments Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 69ea0899c5f2c..6753337c24ea7 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -501,7 +501,6 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { // Maps binary functions to adjacent functions in the FCG. for (const BinaryFunction *CallerBF : BFs) { - // Add all call targets to the hash map. for (const BinaryBasicBlock &BB : CallerBF->blocks()) { for (const MCInst &Inst : BB) { if (!BC.MIB->isCall(Instr)) @@ -533,7 +532,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { } } - // Create mapping from neighbor hash to BFs. + // Using the constructed adjacent function mapping, creates mapping from + // neighbor hash to BFs. std::unordered_map<uint64_t, std::vector<const BinaryFunction *>> NeighborHashToBFs; for (const BinaryFunction *BF : BFs) { @@ -552,12 +552,12 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { .push_back(BF); } - // TODO: change call anchor PR to have this representation - we need it here + // TODO: note, this will be introduced in the matching functions with calls + // as anchors pr DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF> IdToYAMLBF; - // TODO: change call anchor PR to have this representation - we need it here - // Maps hashes to profiled functions. + // Maps YAML functions to adjacent functions in the profile FCG. std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF, FunctionHashes> YamlBFToHashes(BFs.size()); @@ -590,7 +590,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { } } - // Matching YAMLBF with neighbor hashes. + // Matches YAMLBF to BFs with neighbor hashes. for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; >From ee9049fc4bd3d4203c19c9c0982a78ab3b47666f Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Tue, 9 Jul 2024 13:52:05 -0700 Subject: [PATCH 2/2] Moved blended hash definition Created using spr 1.3.4 --- bolt/include/bolt/Profile/YAMLProfileReader.h | 69 ++++++++++- bolt/lib/Profile/StaleProfileMatching.cpp | 65 ----------- bolt/lib/Profile/YAMLProfileReader.cpp | 110 ++++++++---------- 3 files changed, 119 insertions(+), 125 deletions(-) diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 36e8f8739eee1..e8a34ecad9a08 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -16,6 +16,73 @@ namespace llvm { namespace bolt { +/// An object wrapping several components of a basic block hash. The combined +/// (blended) hash is represented and stored as one uint64_t, while individual +/// components are of smaller size (e.g., uint16_t or uint8_t). +struct BlendedBlockHash { +private: + using ValueOffset = Bitfield::Element<uint16_t, 0, 16>; + using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>; + using ValueInstr = Bitfield::Element<uint16_t, 32, 16>; + using ValuePred = Bitfield::Element<uint8_t, 48, 8>; + using ValueSucc = Bitfield::Element<uint8_t, 56, 8>; + +public: + explicit BlendedBlockHash() {} + + explicit BlendedBlockHash(uint64_t Hash) { + Offset = Bitfield::get<ValueOffset>(Hash); + OpcodeHash = Bitfield::get<ValueOpcode>(Hash); + InstrHash = Bitfield::get<ValueInstr>(Hash); + PredHash = Bitfield::get<ValuePred>(Hash); + SuccHash = Bitfield::get<ValueSucc>(Hash); + } + + /// Combine the blended hash into uint64_t. + uint64_t combine() const { + uint64_t Hash = 0; + Bitfield::set<ValueOffset>(Hash, Offset); + Bitfield::set<ValueOpcode>(Hash, OpcodeHash); + Bitfield::set<ValueInstr>(Hash, InstrHash); + Bitfield::set<ValuePred>(Hash, PredHash); + Bitfield::set<ValueSucc>(Hash, SuccHash); + return Hash; + } + + /// Compute a distance between two given blended hashes. The smaller the + /// distance, the more similar two blocks are. For identical basic blocks, + /// the distance is zero. + uint64_t distance(const BlendedBlockHash &BBH) const { + assert(OpcodeHash == BBH.OpcodeHash && + "incorrect blended hash distance computation"); + uint64_t Dist = 0; + // Account for NeighborHash + Dist += SuccHash == BBH.SuccHash ? 0 : 1; + Dist += PredHash == BBH.PredHash ? 0 : 1; + Dist <<= 16; + // Account for InstrHash + Dist += InstrHash == BBH.InstrHash ? 0 : 1; + Dist <<= 16; + // Account for Offset + Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); + return Dist; + } + + /// The offset of the basic block from the function start. + uint16_t Offset{0}; + /// (Loose) Hash of the basic block instructions, excluding operands. + uint16_t OpcodeHash{0}; + /// (Strong) Hash of the basic block instructions, including opcodes and + /// operands. + uint16_t InstrHash{0}; + /// (Loose) Hashes of the predecessors of the basic block. + uint8_t PredHash{0}; + /// (Loose) Hashes of the successors of the basic block. + uint8_t SuccHash{0}; +}; + +struct CallGraphMatcher {}; + class YAMLProfileReader : public ProfileReaderBase { public: explicit YAMLProfileReader(StringRef Filename) @@ -92,7 +159,7 @@ class YAMLProfileReader : public ProfileReaderBase { /// Matches functions using exact hash. size_t matchWithHash(BinaryContext &BC); - + /// Matches functions using the call graph. size_t matchWithCallGraph(BinaryContext &BC); diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index e473beb2fad8c..9631ac176554e 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -121,71 +121,6 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc( namespace llvm { namespace bolt { -/// An object wrapping several components of a basic block hash. The combined -/// (blended) hash is represented and stored as one uint64_t, while individual -/// components are of smaller size (e.g., uint16_t or uint8_t). -struct BlendedBlockHash { -private: - using ValueOffset = Bitfield::Element<uint16_t, 0, 16>; - using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>; - using ValueInstr = Bitfield::Element<uint16_t, 32, 16>; - using ValuePred = Bitfield::Element<uint8_t, 48, 8>; - using ValueSucc = Bitfield::Element<uint8_t, 56, 8>; - -public: - explicit BlendedBlockHash() {} - - explicit BlendedBlockHash(uint64_t Hash) { - Offset = Bitfield::get<ValueOffset>(Hash); - OpcodeHash = Bitfield::get<ValueOpcode>(Hash); - InstrHash = Bitfield::get<ValueInstr>(Hash); - PredHash = Bitfield::get<ValuePred>(Hash); - SuccHash = Bitfield::get<ValueSucc>(Hash); - } - - /// Combine the blended hash into uint64_t. - uint64_t combine() const { - uint64_t Hash = 0; - Bitfield::set<ValueOffset>(Hash, Offset); - Bitfield::set<ValueOpcode>(Hash, OpcodeHash); - Bitfield::set<ValueInstr>(Hash, InstrHash); - Bitfield::set<ValuePred>(Hash, PredHash); - Bitfield::set<ValueSucc>(Hash, SuccHash); - return Hash; - } - - /// Compute a distance between two given blended hashes. The smaller the - /// distance, the more similar two blocks are. For identical basic blocks, - /// the distance is zero. - uint64_t distance(const BlendedBlockHash &BBH) const { - assert(OpcodeHash == BBH.OpcodeHash && - "incorrect blended hash distance computation"); - uint64_t Dist = 0; - // Account for NeighborHash - Dist += SuccHash == BBH.SuccHash ? 0 : 1; - Dist += PredHash == BBH.PredHash ? 0 : 1; - Dist <<= 16; - // Account for InstrHash - Dist += InstrHash == BBH.InstrHash ? 0 : 1; - Dist <<= 16; - // Account for Offset - Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); - return Dist; - } - - /// The offset of the basic block from the function start. - uint16_t Offset{0}; - /// (Loose) Hash of the basic block instructions, excluding operands. - uint16_t OpcodeHash{0}; - /// (Strong) Hash of the basic block instructions, including opcodes and - /// operands. - uint16_t InstrHash{0}; - /// (Loose) Hashes of the predecessors of the basic block. - uint8_t PredHash{0}; - /// (Loose) Hashes of the successors of the basic block. - uint8_t SuccHash{0}; -}; - /// The object is used to identify and match basic blocks in a BinaryFunction /// given their hashes computed on a binary built from several revisions behind /// release. diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 6753337c24ea7..2cab5c385c3c9 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -458,58 +458,53 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { struct FunctionHashes { uint64_t Hash{0}; uint64_t AdjacentFunctionHash{0}; - std::set<uint64_t> AdjacentFunctionHashesSet; + std::vector<uint64_t> AdjacentFunctionHashesSet; }; size_t MatchedWithCallGraph = 0; std::vector<BinaryFunction *> BFs = BC.getAllBinaryFunctions(); - std::unordered_map<const BinaryFunction *, FunctionHashes> BFToHashes( - BFs.size()); + std::unordered_map<BinaryFunction *, FunctionHashes> BFToHashes(BFs.size()); // Computes the loose hash, as in the opcode hash, of a binary function. auto ComputeBFLooseHash = [&](const BinaryFunction *BF) { - std::string FunctionHashStr = std::accumulate( - BF->begin(), BF->end(), std::string(""), - [&](std::string Accum, const BinaryBasicBlock &BB) { - std::string BlockHashStr = hashBlockLoose(BC, BB); - uint16_t OpcodeHash; - if (YamlBP.Header.HashFunction == HashFunction::StdHash) - OpcodeHash = - (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr)); - else if (YamlBP.Header.HashFunction == HashFunction::XXH3) - OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr); - else - llvm_unreachable("Unsupported hash function"); - return std::move(Accum) + std::to_string(OpcodeHash); - }); - + std::string FunctionHashStr; + for (const BinaryBasicBlock &BB : *BF) { + std::string BlockHashStr = hashBlockLoose(BC, BB); + uint16_t OpcodeHash; + if (YamlBP.Header.HashFunction == HashFunction::StdHash) + OpcodeHash = + (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr)); + else if (YamlBP.Header.HashFunction == HashFunction::XXH3) + OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr); + else + llvm_unreachable("Unsupported hash function"); + FunctionHashStr.append(std::to_string(OpcodeHash)); + } return std::hash<std::string>{}(FunctionHashStr); }; // Computes the loose hash of a function profile. auto ComputeYamlBFLooseHash = [&](const yaml::bolt::BinaryFunctionProfile &YamlBF) { - std::string HashStr = std::to_string( - YamlBF.Blocks.begin(), YamlBF.Blocks.end(), std::string(""), - [&](std::string Accum, - const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { - BlendedBlockHash YamlHash(YamlBB.Hash); - return std::move(Accum) + std::to_string(YamlHash.OpcodeHash); - }); - + std::string HashStr; + for (auto &YamlBB : YamlBF.Blocks) { + BlendedBlockHash YamlHash(YamlBB.Hash); + HashStr.append(std::to_string(YamlHash.OpcodeHash)); + } return std::hash<std::string>{}(HashStr); }; // Maps binary functions to adjacent functions in the FCG. - for (const BinaryFunction *CallerBF : BFs) { + for (BinaryFunction *CallerBF : BFs) { for (const BinaryBasicBlock &BB : CallerBF->blocks()) { - for (const MCInst &Inst : BB) { + for (const MCInst &Instr : BB) { if (!BC.MIB->isCall(Instr)) continue; const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); if (!CallSymbol) continue; - BinaryData *BD = - BC.getBinaryDataByName(CallSymbol->getName()) if (!BD) continue; + BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName()); + if (!BD) + continue; BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol()); if (!CalleeBF) continue; @@ -524,9 +519,9 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { CallerBFIt->second = FunctionHashes(); CallerBFIt->second.Hash = ComputeBFLooseHash(CallerBF); } - CalleeBFIt->second.AdjacentFunctionHashesSet.insert( + CalleeBFIt->second.AdjacentFunctionHashesSet.push_back( CallerBFIt->second.Hash); - CallerBFIt->second.AdjacentFunctionHashesSet.insert( + CallerBFIt->second.AdjacentFunctionHashesSet.push_back( CalleeBFIt->second.Hash); } } @@ -534,13 +529,13 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { // Using the constructed adjacent function mapping, creates mapping from // neighbor hash to BFs. - std::unordered_map<uint64_t, std::vector<const BinaryFunction *>> - NeighborHashToBFs; - for (const BinaryFunction *BF : BFs) { + std::unordered_map<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs; + for (BinaryFunction *BF : BFs) { auto It = BFToHashes.find(BF); assert(It != BFToHashes.end() && "All BFs should be processed"); FunctionHashes &FunctionHashes = It->second; - + std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(), + FunctionHashes.AdjacentFunctionHashesSet.end()); std::string AdjacentFunctionHashStr = std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(), FunctionHashes.AdjacentFunctionHashesSet.end(), @@ -554,38 +549,34 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { // TODO: note, this will be introduced in the matching functions with calls // as anchors pr - DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF> - IdToYAMLBF; + DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile *> IdToYAMLBF; // Maps YAML functions to adjacent functions in the profile FCG. - std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF, - FunctionHashes> + std::unordered_map<const yaml::bolt::BinaryFunctionProfile *, FunctionHashes> YamlBFToHashes(BFs.size()); for (const auto &CallerYamlBF : YamlBP.Functions) { - if (YamlBF.Used) - continue; - for (const auto &YamlBB : CallerYamlBF.BasicBlocks) { - for (const auto &CallSite : YamlBB.Callsites) { + for (const auto &YamlBB : CallerYamlBF.Blocks) { + for (const auto &CallSite : YamlBB.CallSites) { auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId); if (IdToYAMLBFIt == IdToYAMLBF.end()) continue; - auto CalleeBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second); - if (CalleeBFIt == YamlBFToHashes.end()) { - CalleeBFIt->second = FunctionHashes(); - CalleeBFIt->second.Hash = + auto CalleeYamlBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second); + if (CalleeYamlBFIt == YamlBFToHashes.end()) { + CalleeYamlBFIt->second = FunctionHashes(); + CalleeYamlBFIt->second.Hash = ComputeYamlBFLooseHash(*IdToYAMLBFIt->second); } - auto CallerBFIt = YamlBFToHashes.find(&CallerYamlBF); - if (CallerBFIt == YamlBFToHashes.end()) { - CallerBFIt->second = FunctionHashes(); - CallerBFIt->second.Hash = ComputeYamlBFLooseHash(CallerBF); + auto CallerYamlBFIt = YamlBFToHashes.find(&CallerYamlBF); + if (CallerYamlBFIt == YamlBFToHashes.end()) { + CallerYamlBFIt->second = FunctionHashes(); + CallerYamlBFIt->second.Hash = ComputeYamlBFLooseHash(CallerYamlBF); } - CalleeBFIt->second.AdjacentFunctionHashesSet.insert( - CallerBFIt->second.Hash); - CallerBFIt->second.AdjacentFunctionHashesSet.insert( - CalleeBFIt->second.Hash); + CalleeYamlBFIt->second.AdjacentFunctionHashesSet.push_back( + CallerYamlBFIt->second.Hash); + CallerYamlBFIt->second.AdjacentFunctionHashesSet.push_back( + CalleeYamlBFIt->second.Hash); } } } @@ -599,7 +590,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { assert(It != YamlBFToHashes.end() && "All unused functions should be processed"); FunctionHashes &FunctionHashes = It->second; - + std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(), + FunctionHashes.AdjacentFunctionHashesSet.end()); std::string AdjacentFunctionHashStr = std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(), FunctionHashes.AdjacentFunctionHashesSet.end(), @@ -614,7 +606,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { if (NeighborHashToBFsIt == NeighborHashToBFs.end()) continue; - for (const BinaryFunction *BF : NeighborHashToBFsIt->second) { + for (BinaryFunction *BF : NeighborHashToBFsIt->second) { if (!ProfiledFunctions.count(BF) && profileMatches(YamlBF, *BF)) { matchProfileToFunction(YamlBF, *BF); ++MatchedWithCallGraph; @@ -668,7 +660,7 @@ size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { // Maps namespaces to BFs excluding binary functions with no equal sized // profiled functions belonging to the same namespace. - / for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { std::string DemangledName = BF->getDemangledName(); std::string Namespace = DeriveNameSpace(DemangledName); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits