https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/115750
>From 70dcb2ccba98b392c3539f349ccf7fec284a674c Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Mon, 11 Nov 2024 10:06:56 -0800 Subject: [PATCH 1/2] [CGData] Refactor Global Merge Functions --- llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 148 +++++++++------------- 1 file changed, 59 insertions(+), 89 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 2b367ca87d9008..df8dbb8a73b95d 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -31,14 +31,6 @@ static cl::opt<bool> DisableCGDataForMerging( "merging is still enabled within a module."), cl::init(false)); -STATISTIC(NumMismatchedFunctionHash, - "Number of mismatched function hash for global merge function"); -STATISTIC(NumMismatchedInstCount, - "Number of mismatched instruction count for global merge function"); -STATISTIC(NumMismatchedConstHash, - "Number of mismatched const hash for global merge function"); -STATISTIC(NumMismatchedModuleId, - "Number of mismatched Module Id for global merge function"); STATISTIC(NumMergedFunctions, "Number of functions that are actually merged using function hash"); STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed"); @@ -203,9 +195,9 @@ void GlobalMergeFunc::analyze(Module &M) { struct FuncMergeInfo { StableFunctionMap::StableFunctionEntry *SF; Function *F; - std::unique_ptr<IndexInstrMap> IndexInstruction; + IndexInstrMap *IndexInstruction; FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F, - std::unique_ptr<IndexInstrMap> IndexInstruction) + IndexInstrMap *IndexInstruction) : SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {} }; @@ -420,101 +412,79 @@ static ParamLocsVecTy computeParamInfo( bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { bool Changed = false; - // Build a map from stable function name to function. - StringMap<Function *> StableNameToFuncMap; - for (auto &F : M) - StableNameToFuncMap[get_stable_name(F.getName())] = &F; - // Track merged functions - DenseSet<Function *> MergedFunctions; - - auto ModId = M.getModuleIdentifier(); - for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) { - // Parameter locations based on the unique hash sequences - // across the candidates. + // Collect stable functions related to the current module. + DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs; + DenseMap<Function *, FunctionHashInfo> FuncToFI; + auto &Maps = FunctionMap->getFunctionMap(); + for (auto &F : M) { + if (!isEligibleFunction(&F)) + continue; + auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp); + if (Maps.contains(FI.FunctionHash)) { + HashToFuncs[FI.FunctionHash].push_back(&F); + FuncToFI.try_emplace(&F, std::move(FI)); + } + } + + for (auto &[Hash, Funcs] : HashToFuncs) { std::optional<ParamLocsVecTy> ParamLocsVec; - Function *MergedFunc = nullptr; - std::string MergedModId; SmallVector<FuncMergeInfo> FuncMergeInfos; - for (auto &SF : SFS) { - // Get the function from the stable name. - auto I = StableNameToFuncMap.find( - *FunctionMap->getNameForId(SF->FunctionNameId)); - if (I == StableNameToFuncMap.end()) - continue; - Function *F = I->second; - assert(F); - // Skip if the function has been merged before. - if (MergedFunctions.count(F)) - continue; - // Consider the function if it is eligible for merging. - if (!isEligibleFunction(F)) - continue; - auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp); - uint64_t FuncHash = FI.FunctionHash; - if (Hash != FuncHash) { - ++NumMismatchedFunctionHash; - continue; - } + // Iterate functions with the same hash. + for (auto &F : Funcs) { + auto &SFS = Maps.at(Hash); + auto &FI = FuncToFI.at(F); - if (SF->InstCount != FI.IndexInstruction->size()) { - ++NumMismatchedInstCount; + // Check if the function is compatible with any stable function + // in terms of the number of instructions and ignored operands. + assert(!SFS.empty()); + auto &RFS = SFS[0]; + if (RFS->InstCount != FI.IndexInstruction->size()) continue; - } - bool HasValidSharedConst = true; - for (auto &[Index, Hash] : *SF->IndexOperandHashMap) { - auto [InstIndex, OpndIndex] = Index; - assert(InstIndex < FI.IndexInstruction->size()); - auto *Inst = FI.IndexInstruction->lookup(InstIndex); - if (!ignoreOp(Inst, OpndIndex)) { - HasValidSharedConst = false; - break; - } - } - if (!HasValidSharedConst) { - ++NumMismatchedConstHash; - continue; - } - if (!checkConstHashCompatible(*SF->IndexOperandHashMap, - *FI.IndexOperandHashMap)) { - ++NumMismatchedConstHash; - continue; - } - if (!ParamLocsVec.has_value()) { - ParamLocsVec = computeParamInfo(SFS); - LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash - << " with Params " << ParamLocsVec->size() << "\n"); - } - if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction, - *ParamLocsVec)) { - ++NumMismatchedConstHash; + + auto hasValidSharedConst = + [&](StableFunctionMap::StableFunctionEntry *SF) { + for (auto &[Index, Hash] : *SF->IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = Index; + assert(InstIndex < FI.IndexInstruction->size()); + auto *Inst = FI.IndexInstruction->lookup(InstIndex); + if (!ignoreOp(Inst, OpndIndex)) + return false; + } + return true; + }; + if (!hasValidSharedConst(RFS.get())) continue; - } - if (MergedFunc) { - // Check if the matched functions fall into the same (first) module. - // This module check is not strictly necessary as the functions can move - // around. We just want to avoid merging functions from different - // modules than the first one in the function map, as they may not end - // up with being ICFed by the linker. - if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) { - ++NumMismatchedModuleId; + for (auto &SF : SFS) { + assert(SF->InstCount == FI.IndexInstruction->size()); + assert(hasValidSharedConst(SF.get())); + // Check if there is any stable function that is compatiable with the + // current one. + if (!checkConstHashCompatible(*SF->IndexOperandHashMap, + *FI.IndexOperandHashMap)) continue; + if (!ParamLocsVec.has_value()) { + ParamLocsVec = computeParamInfo(SFS); + LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash + << " with Params " << ParamLocsVec->size() << "\n"); } - } else { - MergedFunc = F; - MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId); - } + if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction, + *ParamLocsVec)) + continue; - FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction)); - MergedFunctions.insert(F); + // As long as we found one stable function matching the current one, + // we create a candidate for merging and move on to the next function. + FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get()); + break; + } } unsigned FuncMergeInfoSize = FuncMergeInfos.size(); if (FuncMergeInfoSize == 0) continue; LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count " - << FuncMergeInfoSize << " in " << ModId << "\n"); + << FuncMergeInfoSize << " for hash: " << Hash << "\n"); for (auto &FMI : FuncMergeInfos) { Changed = true; >From e09f9c0e839b9e6f8254599956f998f4880f8ebf Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Mon, 11 Nov 2024 21:23:58 -0800 Subject: [PATCH 2/2] Address comments from nocchijiang --- llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 24 ++++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index df8dbb8a73b95d..15f70395809a73 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -413,32 +413,28 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { bool Changed = false; // Collect stable functions related to the current module. - DenseMap<stable_hash, SmallVector<Function *>> HashToFuncs; - DenseMap<Function *, FunctionHashInfo> FuncToFI; + DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>> + HashToFuncs; auto &Maps = FunctionMap->getFunctionMap(); for (auto &F : M) { if (!isEligibleFunction(&F)) continue; auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp); - if (Maps.contains(FI.FunctionHash)) { - HashToFuncs[FI.FunctionHash].push_back(&F); - FuncToFI.try_emplace(&F, std::move(FI)); - } + if (Maps.contains(FI.FunctionHash)) + HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI)); } for (auto &[Hash, Funcs] : HashToFuncs) { std::optional<ParamLocsVecTy> ParamLocsVec; SmallVector<FuncMergeInfo> FuncMergeInfos; + assert(!SFS.empty()); + auto &SFS = Maps.at(Hash); + auto &RFS = SFS[0]; // Iterate functions with the same hash. - for (auto &F : Funcs) { - auto &SFS = Maps.at(Hash); - auto &FI = FuncToFI.at(F); - + for (auto &[F, FI] : Funcs) { // Check if the function is compatible with any stable function // in terms of the number of instructions and ignored operands. - assert(!SFS.empty()); - auto &RFS = SFS[0]; if (RFS->InstCount != FI.IndexInstruction->size()) continue; @@ -473,8 +469,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) { *ParamLocsVec)) continue; - // As long as we found one stable function matching the current one, - // we create a candidate for merging and move on to the next function. + // If a stable function matching the current one is found, + // create a candidate for merging and proceed to the next function. FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get()); break; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits