llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> To be used for pseudo probe function matching (#<!-- -->100446). --- Full diff: https://github.com/llvm/llvm-project/pull/107137.diff 3 Files Affected: - (modified) bolt/include/bolt/Profile/ProfileYAMLMapping.h (+38-11) - (modified) bolt/lib/Profile/DataAggregator.cpp (+46-7) - (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+49-10) ``````````diff diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 2a0514d7d9304b..f0cc116ebc6cb0 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -95,24 +95,28 @@ template <> struct MappingTraits<bolt::SuccessorInfo> { namespace bolt { struct PseudoProbeInfo { - llvm::yaml::Hex64 GUID; uint64_t Index; + uint32_t InlineTreeIndex; + llvm::yaml::Hex32 Offset{0}; uint8_t Type; bool operator==(const PseudoProbeInfo &Other) const { - return GUID == Other.GUID && Index == Other.Index; + return InlineTreeIndex == Other.InlineTreeIndex && Index == Other.Index; } - bool operator!=(const PseudoProbeInfo &Other) const { - return !(*this == Other); + bool operator<(const PseudoProbeInfo &Other) const { + if (InlineTreeIndex == Other.InlineTreeIndex) + return Index < Other.Index; + return InlineTreeIndex < Other.InlineTreeIndex; } }; } // end namespace bolt template <> struct MappingTraits<bolt::PseudoProbeInfo> { static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) { - YamlIO.mapRequired("guid", PI.GUID); YamlIO.mapRequired("id", PI.Index); YamlIO.mapRequired("type", PI.Type); + YamlIO.mapOptional("inline_tree_id", PI.InlineTreeIndex, (uint32_t)0); + YamlIO.mapOptional("offset", PI.Offset, (uint32_t)0); } static const bool flow = true; @@ -122,7 +126,7 @@ template <> struct MappingTraits<bolt::PseudoProbeInfo> { LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo) LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo) namespace llvm { namespace yaml { @@ -163,10 +167,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> { } }; +namespace bolt { +struct InlineTreeInfo { + uint32_t Index; + uint32_t ParentIndex; + uint32_t CallSiteProbe; + llvm::yaml::Hex64 GUID; + llvm::yaml::Hex64 Hash; + bool operator==(const InlineTreeInfo &Other) const { + return Index == Other.Index; + } +}; +} // end namespace bolt + +template <> struct MappingTraits<bolt::InlineTreeInfo> { + static void mapping(IO &YamlIO, bolt::InlineTreeInfo &ITI) { + YamlIO.mapRequired("guid", ITI.GUID); + YamlIO.mapRequired("hash", ITI.Hash); + YamlIO.mapRequired("id", ITI.Index); + YamlIO.mapOptional("parent", ITI.ParentIndex, (uint32_t)0); + YamlIO.mapOptional("callsite", ITI.CallSiteProbe, 0); + } + + static const bool flow = true; +}; } // end namespace yaml } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo) namespace llvm { namespace yaml { @@ -179,8 +208,7 @@ struct BinaryFunctionProfile { llvm::yaml::Hex64 Hash{0}; uint64_t ExecCount{0}; std::vector<BinaryBasicBlockProfile> Blocks; - llvm::yaml::Hex64 GUID{0}; - llvm::yaml::Hex64 PseudoProbeDescHash{0}; + std::vector<InlineTreeInfo> InlineTree; bool Used{false}; }; } // end namespace bolt @@ -194,9 +222,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> { YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); YamlIO.mapOptional("blocks", BFP.Blocks, std::vector<bolt::BinaryBasicBlockProfile>()); - YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0); - YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash, - (uint64_t)0); + YamlIO.mapOptional("inline_tree", BFP.InlineTree, + std::vector<bolt::InlineTreeInfo>()); } }; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 10d745cc69824b..803cc4725b5702 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/raw_ostream.h" #include <map> #include <optional> +#include <queue> #include <unordered_map> #include <utility> @@ -2402,12 +2403,43 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset); YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches; } - if (PseudoProbeDecoder) { - if ((YamlBF.GUID = BF->getGUID())) { - const MCPseudoProbeFuncDesc *FuncDesc = - PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID); - YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash; + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> + InlineTreeNodeId; + if (PseudoProbeDecoder && BF->getGUID()) { + std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist; + // FIXME: faster inline tree lookup by top-level GUID + if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if( + PseudoProbeDecoder->getDummyInlineRoot().getChildren(), + [&](const auto &InlineTree) { + return InlineTree.Guid == BF->getGUID(); + })) { + ITWorklist.push(InlineTree); + InlineTreeNodeId[InlineTree] = 0; + auto Hash = + PseudoProbeDecoder->getFuncDescForGUID(BF->getGUID())->FuncHash; + YamlBF.InlineTree.emplace_back( + yaml::bolt::InlineTreeInfo{0, 0, 0, BF->getGUID(), Hash}); + } + uint32_t ParentId = 0; + uint32_t NodeId = 1; + while (!ITWorklist.empty()) { + const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front(); + for (const MCDecodedPseudoProbeInlineTree &Child : + Cur->getChildren()) { + InlineTreeNodeId[&Child] = NodeId; + auto Hash = + PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash; + YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{ + NodeId++, ParentId, std::get<1>(Child.getInlineSite()), + Child.Guid, Hash}); + ITWorklist.push(&Child); + } + ITWorklist.pop(); + ++ParentId; } + } + + if (PseudoProbeDecoder) { // Fetch probes belonging to all fragments const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); @@ -2420,12 +2452,19 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); - const unsigned BlockIndex = getBlock(InputOffset).second; + const auto [BlockOffset, BlockIndex] = getBlock(InputOffset); + uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()]; + uint32_t Offset = InputOffset - BlockOffset; YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), + yaml::bolt::PseudoProbeInfo{Probe.getIndex(), NodeId, Offset, Probe.getType()}); } } + for (yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + llvm::sort(YamlBB.PseudoProbes); + YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes), + YamlBB.PseudoProbes.end()); + } } // Drop blocks without a hash, won't be useful for stale matching. llvm::erase_if(YamlBF.Blocks, diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index ffbf2388e912fb..817689230e2a70 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -12,11 +12,15 @@ #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/DataAggregator.h" #include "bolt/Profile/ProfileReaderBase.h" +#include "bolt/Profile/ProfileYAMLMapping.h" #include "bolt/Rewrite/RewriteInstance.h" #include "bolt/Utils/CommandLineOpts.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" +#include <deque> +#include <queue> #undef DEBUG_TYPE #define DEBUG_TYPE "bolt-prof" @@ -77,13 +81,6 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.Hash = BF.getHash(); YamlBF.NumBasicBlocks = BF.size(); YamlBF.ExecCount = BF.getKnownExecutionCount(); - if (PseudoProbeDecoder) { - if ((YamlBF.GUID = BF.getGUID())) { - const MCPseudoProbeFuncDesc *FuncDesc = - PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID); - YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash; - } - } BinaryFunction::BasicBlockOrderType Order; llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(), @@ -92,6 +89,40 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const FunctionLayout Layout = BF.getLayout(); Layout.updateLayoutIndices(Order); + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; + if (PseudoProbeDecoder && BF.getGUID()) { + std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist; + // FIXME: faster inline tree lookup by top-level GUID + if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if( + PseudoProbeDecoder->getDummyInlineRoot().getChildren(), + [&](const auto &InlineTree) { + return InlineTree.Guid == BF.getGUID(); + })) { + ITWorklist.push(InlineTree); + InlineTreeNodeId[InlineTree] = 0; + auto Hash = + PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash; + YamlBF.InlineTree.emplace_back( + yaml::bolt::InlineTreeInfo{0, 0, 0, BF.getGUID(), Hash}); + } + uint32_t ParentId = 0; + uint32_t NodeId = 1; + while (!ITWorklist.empty()) { + const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front(); + for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) { + InlineTreeNodeId[&Child] = NodeId; + auto Hash = + PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash; + YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{ + NodeId++, ParentId, std::get<1>(Child.getInlineSite()), Child.Guid, + Hash}); + ITWorklist.push(&Child); + } + ITWorklist.pop(); + ++ParentId; + } + } + for (const BinaryBasicBlock *BB : Order) { yaml::bolt::BinaryBasicBlockProfile YamlBB; YamlBB.Index = BB->getLayoutIndex(); @@ -198,10 +229,18 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const uint64_t FuncAddr = BF.getAddress(); const std::pair<uint64_t, uint64_t> &BlockRange = BB->getInputAddressRange(); - for (const MCDecodedPseudoProbe &Probe : ProbeMap.find( - FuncAddr + BlockRange.first, FuncAddr + BlockRange.second)) + const std::pair<uint64_t, uint64_t> BlockAddrRange = { + FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; + for (const MCDecodedPseudoProbe &Probe : + ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second)) { + uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()]; + uint32_t Offset = Probe.getAddress() - BlockAddrRange.first; YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ - Probe.getGuid(), Probe.getIndex(), Probe.getType()}); + Probe.getIndex(), NodeId, Offset, Probe.getType()}); + } + llvm::sort(YamlBB.PseudoProbes); + YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes), + YamlBB.PseudoProbes.end()); } YamlBF.Blocks.emplace_back(YamlBB); `````````` </details> https://github.com/llvm/llvm-project/pull/107137 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits