Author: Teresa Johnson Date: 2024-08-28T11:44:54-07:00 New Revision: 5a00383d7f192a2951e3add4d8ab1f918e7d58f8
URL: https://github.com/llvm/llvm-project/commit/5a00383d7f192a2951e3add4d8ab1f918e7d58f8 DIFF: https://github.com/llvm/llvm-project/commit/5a00383d7f192a2951e3add4d8ab1f918e7d58f8.diff LOG: Revert "Revert "[MemProf] Reduce cloning overhead by sharing nodes when possi…" This reverts commit 11aa31f595325d6b2dede3364e4b86d78fffe635. Added: Modified: llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp Removed: ################################################################################ diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 66b68d5cd457fb..c9de9c964bba0a 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -242,9 +242,16 @@ class CallsiteContextGraph { // recursion. bool Recursive = false; - // The corresponding allocation or interior call. + // The corresponding allocation or interior call. This is the primary call + // for which we have created this node. CallInfo Call; + // List of other calls that can be treated the same as the primary call + // through cloning. I.e. located in the same function and have the same + // (possibly pruned) stack ids. They will be updated the same way as the + // primary call when assigning to function clones. + std::vector<CallInfo> MatchingCalls; + // For alloc nodes this is a unique id assigned when constructed, and for // callsite stack nodes it is the original stack id when the node is // constructed from the memprof MIB metadata on the alloc nodes. Note that @@ -457,6 +464,9 @@ class CallsiteContextGraph { /// iteration. MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata; + /// Records the function each call is located in. + DenseMap<CallInfo, const FuncTy *> CallToFunc; + /// Map from callsite node to the enclosing caller function. std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; @@ -474,7 +484,8 @@ class CallsiteContextGraph { /// StackIdToMatchingCalls map. void assignStackNodesPostOrder( ContextNode *Node, DenseSet<const ContextNode *> &Visited, - DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); + DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls, + DenseMap<CallInfo, CallInfo> &CallToMatchingCall); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, template <typename DerivedCCG, typename FuncTy, typename CallTy> void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet<const ContextNode *> &Visited, - DenseMap<uint64_t, std::vector<CallContextInfo>> - &StackIdToMatchingCalls) { + assignStackNodesPostOrder( + ContextNode *Node, DenseSet<const ContextNode *> &Visited, + DenseMap<uint64_t, std::vector<CallContextInfo>> + &StackIdToMatchingCalls, + DenseMap<CallInfo, CallInfo> &CallToMatchingCall) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: // Skip any that have been removed during the recursion. if (!Edge) continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, + CallToMatchingCall); } // If this node's stack id is in the map, update the graph to contain new @@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; // Skip any for which we didn't assign any ids, these don't get a node in // the graph. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + // If this call has a matching call (located in the same function and + // having the same stack ids), simply add it to the context node created + // for its matching call earlier. These can be treated the same through + // cloning and get updated at the same time. + if (!CallToMatchingCall.contains(Call)) + continue; + auto MatchingCall = CallToMatchingCall[Call]; + assert(NonAllocationCallToContextNodeMap.contains(MatchingCall)); + NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back( + Call); continue; + } assert(LastId == Ids.back()); @@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { // there is more than one call with the same stack ids. Their (possibly newly // duplicated) context ids are saved in the StackIdToMatchingCalls map. DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; + // Save a map from each call to any that are found to match it. I.e. located + // in the same function and have the same (possibly pruned) stack ids. We use + // this to avoid creating extra graph nodes as they can be treated the same. + DenseMap<CallInfo, CallInfo> CallToMatchingCall; for (auto &It : StackIdToMatchingCalls) { auto &Calls = It.getSecond(); // Skip single calls with a single stack id. These don't need a new node. @@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds(); assert(!LastNodeContextIds.empty()); + // Map from function to the first call from the below list (with matching + // stack ids) found in that function. Note that calls from diff erent + // functions can have the same stack ids because this is the list of stack + // ids that had (possibly pruned) nodes after building the graph from the + // allocation MIBs. + DenseMap<const FuncTy *, CallInfo> FuncToCallMap; + for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; assert(SavedContextIds.empty()); @@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { continue; } + const FuncTy *CallFunc = CallToFunc[Call]; + + // If the prior call had the same stack ids this map would not be empty. + // Check if we already have a call that "matches" because it is located + // in the same function. + if (FuncToCallMap.contains(CallFunc)) { + // Record the matching call found for this call, and skip it. We + // will subsequently combine it into the same node. + CallToMatchingCall[Call] = FuncToCallMap[CallFunc]; + continue; + } + // Check if the next set of stack ids is the same (since the Calls vector // of tuples is sorted by the stack ids we can just look at the next one). bool DuplicateContextIds = false; @@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { set_subtract(LastNodeContextIds, StackSequenceContextIds); if (LastNodeContextIds.empty()) break; - } + // No longer possibly in a sequence of calls with duplicate stack ids, + // clear the map. + FuncToCallMap.clear(); + } else + // Record the call with its function, so we can locate it the next time + // we find a call from this function when processing the calls with the + // same stack ids. + FuncToCallMap[CallFunc] = Call; } } @@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { // associated context ids over to the new nodes. DenseSet<const ContextNode *> Visited; for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, + CallToMatchingCall); if (VerifyCCG) check(); } @@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( continue; if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; auto *AllocNode = addAllocNode(&I, &F); auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); assert(CallsiteMD); @@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( I.setMetadata(LLVMContext::MD_callsite, nullptr); } // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) + else if (I.getMetadata(LLVMContext::MD_callsite)) { CallsWithMetadata.push_back(&I); + CallToFunc[&I] = &F; + } } } if (!CallsWithMetadata.empty()) @@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( // correlate properly in applyImport in the backends. if (AN.MIBs.empty()) continue; - CallsWithMetadata.push_back({&AN}); - auto *AllocNode = addAllocNode({&AN}, FS); + IndexCall AllocCall(&AN); + CallsWithMetadata.push_back(AllocCall); + CallToFunc[AllocCall] = FS; + auto *AllocNode = addAllocNode(AllocCall, FS); // Pass an empty CallStack to the CallsiteContext (second) // parameter, since for ThinLTO we already collapsed out the inlined // stack ids on the allocation call during ModuleSummaryAnalysis. @@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } // For callsite metadata, add to list for this function for later use. if (!FS->callsites().empty()) - for (auto &SN : FS->mutableCallsites()) - CallsWithMetadata.push_back({&SN}); + for (auto &SN : FS->mutableCallsites()) { + IndexCall StackNodeCall(&SN); + CallsWithMetadata.push_back(StackNodeCall); + CallToFunc[StackNodeCall] = FS; + } if (!CallsWithMetadata.empty()) FuncToCallsWithMetadata[FS] = CallsWithMetadata; @@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( if (Recursive) OS << " (recursive)"; OS << "\n"; + if (!MatchingCalls.empty()) { + OS << "\tMatchingCalls:\n"; + for (auto &MatchingCall : MatchingCalls) { + OS << "\t"; + MatchingCall.print(OS); + OS << "\n"; + } + } OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; OS << "\tContextIds:"; // Make a copy of the computed context ids that we can sort for stability. @@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone( std::make_unique<ContextNode>(Node->IsAllocation, Node->Call)); ContextNode *Clone = NodeOwner.back().get(); Node->addClone(Clone); + Clone->MatchingCalls = Node->MatchingCalls; assert(NodeToCallingFunc.count(Node)); NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, @@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { if (CallMap.count(Call)) CallClone = CallMap[Call]; CallsiteClone->setCall(CallClone); + // Need to do the same for all matching calls. + for (auto &MatchingCall : Node->MatchingCalls) { + CallInfo CallClone(MatchingCall); + if (CallMap.count(MatchingCall)) + CallClone = CallMap[MatchingCall]; + // Updates the call in the list. + MatchingCall = CallClone; + } }; // Keep track of the clones of callsite Node that need to be assigned to @@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { CallInfo NewCall(CallMap[OrigCall]); assert(NewCall); NewClone->setCall(NewCall); + // Need to do the same for all matching calls. + for (auto &MatchingCall : NewClone->MatchingCalls) { + CallInfo OrigMatchingCall(MatchingCall); + OrigMatchingCall.setCloneNo(0); + assert(CallMap.count(OrigMatchingCall)); + CallInfo NewCall(CallMap[OrigMatchingCall]); + assert(NewCall); + // Updates the call in the list. + MatchingCall = NewCall; + } } } // Fall through to handling below to perform the recording of the @@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { if (Node->IsAllocation) { updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes)); + assert(Node->MatchingCalls.empty()); return; } @@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node]; updateCall(Node->Call, CalleeFunc); + // Update all the matching calls as well. + for (auto &Call : Node->MatchingCalls) + updateCall(Call, CalleeFunc); }; // Performs DFS traversal starting from allocation nodes to update calls to _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits