https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/81051
>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Thu, 8 Feb 2024 09:07:27 -0800 Subject: [PATCH] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID --- llvm/include/llvm/ProfileData/InstrProf.h | 19 +++++ llvm/lib/ProfileData/InstrProf.cpp | 87 ++++++++++++++------ llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++ 3 files changed, 138 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 53108a093bf4dd..6e799cf8aa273e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -487,8 +487,25 @@ class InstrProfSymtab { return "** External Symbol **"; } + // Returns the canonical name of the given PGOName by stripping the names + // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq." + // suffix is kept in the canonical name. + StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix); + + // Add the function into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOFuncName), PGOFuncName> + // - <MD5Hash(PGOFuncName), F> + // - <MD5Hash(getCanonicalName(PGOFuncName), F) Error addFuncWithName(Function &F, StringRef PGOFuncName); + // Add the vtable into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOVTableName), PGOVTableName> + // - <MD5Hash(PGOVTableName), V> + // - <MD5Hash(getCanonicalName(PGOVTableName), V) + Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName); + // If the symtab is created by a series of calls to \c addFuncName, \c // finalizeSymtab needs to be called before looking up function names. // This is required because the underlying map is a vector (for space @@ -543,6 +560,7 @@ class InstrProfSymtab { Error create(const FuncNameIterRange &FuncIterRange, const VTableNameIterRange &VTableIterRange); + // Map the MD5 of the symbol name to the name. Error addSymbolName(StringRef SymbolName) { if (SymbolName.empty()) return make_error<InstrProfError>(instrprof_error::malformed, @@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; llvm::sort(MD5NameMap, less_first()); + llvm::sort(MD5VTableMap, less_first()); llvm::sort(MD5FuncMap, less_first()); llvm::sort(AddrToMD5Map, less_first()); AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 9ebcba10c860ff..a09a2bb0ba77cb 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { Types.clear(); G.getMetadata(LLVMContext::MD_type, Types); if (!Types.empty()) { - MD5VTableMap.emplace_back(G.getGUID(), &G); + if (Error E = addVTableWithName( + G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr))) + return E; } } Sorted = false; @@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { return Error::success(); } +Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, + StringRef VTablePGOName) { + if (Error E = addVTableName(VTablePGOName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable); + + // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no + // need to check ".__uniq." + + // If a local-linkage vtable is promoted to have external linkage in ThinLTO, + // it will have `.llvm.` in its name. Use the name before externalization. + StringRef CanonicalName = + getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false); + if (CanonicalName != VTablePGOName) { + if (Error E = addVTableName(CanonicalName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable); + } + + return Error::success(); +} + /// \c NameStrings is a string composed of one of more possibly encoded /// sub-strings. The substrings are separated by 0 or more zero bytes. This /// method decodes the string and calls `NameCallback` for each substring. @@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings( std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); } -Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { - if (Error E = addFuncName(PGOFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); +StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName, + bool MayHaveUniqueSuffix) { + size_t pos = 0; // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the // stripped function name to the symbol table so that we can find a match // from profile. // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. - const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); - // Search '.' after ".__uniq." if ".__uniq." exists, otherwise - // search '.' from the beginning. - if (pos != std::string::npos) - pos += UniqSuffix.length(); - else - pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - StringRef OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) + // ".__uniq." suffix is used to differentiate internal linkage functions in + // different modules and should be kept. Now this is the only suffix with the + // pattern ".xxx" which is kept before matching, other suffixes similar as + // ".llvm." will be stripped. + if (MayHaveUniqueSuffix) { + const std::string UniqSuffix = ".__uniq."; + pos = PGOName.find(UniqSuffix); + if (pos != StringRef::npos) + pos += UniqSuffix.length(); + else + pos = 0; + } + + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from + // the beginning. + pos = PGOName.find('.', pos); + if (pos != StringRef::npos && pos != 0) + return PGOName.substr(0, pos); + + return PGOName; +} + +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + if (Error E = addFuncName(PGOFuncName)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); + + StringRef CanonicalName = + getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true); + + if (CanonicalName != PGOFuncName) { + if (Error E = addFuncName(CanonicalName)) return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); + MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F); } + return Error::success(); } diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 4b99195c1b859a..edde544660e454 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get()); Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get()); + // [ptr, ptr, ptr] + ArrayType *VTableArrayType = ArrayType::get( + PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()), + 3); + Constant *Int32TyNull = + llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx)); + SmallVector<llvm::Type *, 1> tys = {VTableArrayType}; + StructType *VTableType = llvm::StructType::get(Ctx, tys); + + // Create a vtable definition with external linkage. + GlobalVariable *ExternalGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::ExternalLinkage, + llvm::ConstantStruct::get( + VTableType, {llvm::ConstantArray::get( + VTableArrayType, + {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInExternalGV", M.get())})}), + "ExternalGV"); + + // Create a vtable definition for local-linkage function. + GlobalVariable *LocalGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantStruct::get( + VTableType, + {llvm::ConstantArray::get( + VTableArrayType, {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInLocalGV", M.get())})}), + "LocalGV"); + + // Add type metadata for the test data, since vtables with type metadata are + // added to symtab. + ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV")); + LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV")); + InstrProfSymtab ProfSymtab; EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded()); @@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { EXPECT_EQ(StringRef(PGOName), PGOFuncName); EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str())); } + + StringRef VTables[] = {"ExternalGV", "LocalGV"}; + for (StringRef VTableName : VTables) { + GlobalVariable *GV = + M->getGlobalVariable(VTableName, /* AllowInternal=*/true); + + // Test that ProfSymtab returns the expected name given a hash. + std::string IRPGOName = getPGOName(*GV); + uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName); + EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID)); + EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second); + + // Test that ProfSymtab returns the expected global variable + EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID)); + } } // Testing symtab serialization and creator/deserialization interface _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits