https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/81051
>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Thu, 8 Feb 2024 09:07:27 -0800 Subject: [PATCH 1/2] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID --- llvm/include/llvm/ProfileData/InstrProf.h | 19 +++++ llvm/lib/ProfileData/InstrProf.cpp | 87 ++++++++++++++------ llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++ 3 files changed, 138 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 53108a093bf4dd..6e799cf8aa273e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -487,8 +487,25 @@ class InstrProfSymtab { return "** External Symbol **"; } + // Returns the canonical name of the given PGOName by stripping the names + // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq." + // suffix is kept in the canonical name. + StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix); + + // Add the function into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOFuncName), PGOFuncName> + // - <MD5Hash(PGOFuncName), F> + // - <MD5Hash(getCanonicalName(PGOFuncName), F) Error addFuncWithName(Function &F, StringRef PGOFuncName); + // Add the vtable into the symbol table, by creating the following + // map entries: + // - <MD5Hash(PGOVTableName), PGOVTableName> + // - <MD5Hash(PGOVTableName), V> + // - <MD5Hash(getCanonicalName(PGOVTableName), V) + Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName); + // If the symtab is created by a series of calls to \c addFuncName, \c // finalizeSymtab needs to be called before looking up function names. // This is required because the underlying map is a vector (for space @@ -543,6 +560,7 @@ class InstrProfSymtab { Error create(const FuncNameIterRange &FuncIterRange, const VTableNameIterRange &VTableIterRange); + // Map the MD5 of the symbol name to the name. Error addSymbolName(StringRef SymbolName) { if (SymbolName.empty()) return make_error<InstrProfError>(instrprof_error::malformed, @@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; llvm::sort(MD5NameMap, less_first()); + llvm::sort(MD5VTableMap, less_first()); llvm::sort(MD5FuncMap, less_first()); llvm::sort(AddrToMD5Map, less_first()); AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 9ebcba10c860ff..a09a2bb0ba77cb 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { Types.clear(); G.getMetadata(LLVMContext::MD_type, Types); if (!Types.empty()) { - MD5VTableMap.emplace_back(G.getGUID(), &G); + if (Error E = addVTableWithName( + G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr))) + return E; } } Sorted = false; @@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { return Error::success(); } +Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, + StringRef VTablePGOName) { + if (Error E = addVTableName(VTablePGOName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable); + + // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no + // need to check ".__uniq." + + // If a local-linkage vtable is promoted to have external linkage in ThinLTO, + // it will have `.llvm.` in its name. Use the name before externalization. + StringRef CanonicalName = + getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false); + if (CanonicalName != VTablePGOName) { + if (Error E = addVTableName(CanonicalName)) + return E; + + MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable); + } + + return Error::success(); +} + /// \c NameStrings is a string composed of one of more possibly encoded /// sub-strings. The substrings are separated by 0 or more zero bytes. This /// method decodes the string and calls `NameCallback` for each substring. @@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings( std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); } -Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { - if (Error E = addFuncName(PGOFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); +StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName, + bool MayHaveUniqueSuffix) { + size_t pos = 0; // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the // stripped function name to the symbol table so that we can find a match // from profile. // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. - const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); - // Search '.' after ".__uniq." if ".__uniq." exists, otherwise - // search '.' from the beginning. - if (pos != std::string::npos) - pos += UniqSuffix.length(); - else - pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - StringRef OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) + // ".__uniq." suffix is used to differentiate internal linkage functions in + // different modules and should be kept. Now this is the only suffix with the + // pattern ".xxx" which is kept before matching, other suffixes similar as + // ".llvm." will be stripped. + if (MayHaveUniqueSuffix) { + const std::string UniqSuffix = ".__uniq."; + pos = PGOName.find(UniqSuffix); + if (pos != StringRef::npos) + pos += UniqSuffix.length(); + else + pos = 0; + } + + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from + // the beginning. + pos = PGOName.find('.', pos); + if (pos != StringRef::npos && pos != 0) + return PGOName.substr(0, pos); + + return PGOName; +} + +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + if (Error E = addFuncName(PGOFuncName)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); + + StringRef CanonicalName = + getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true); + + if (CanonicalName != PGOFuncName) { + if (Error E = addFuncName(CanonicalName)) return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); + MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F); } + return Error::success(); } diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 4b99195c1b859a..edde544660e454 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get()); Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get()); + // [ptr, ptr, ptr] + ArrayType *VTableArrayType = ArrayType::get( + PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()), + 3); + Constant *Int32TyNull = + llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx)); + SmallVector<llvm::Type *, 1> tys = {VTableArrayType}; + StructType *VTableType = llvm::StructType::get(Ctx, tys); + + // Create a vtable definition with external linkage. + GlobalVariable *ExternalGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::ExternalLinkage, + llvm::ConstantStruct::get( + VTableType, {llvm::ConstantArray::get( + VTableArrayType, + {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInExternalGV", M.get())})}), + "ExternalGV"); + + // Create a vtable definition for local-linkage function. + GlobalVariable *LocalGV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantStruct::get( + VTableType, + {llvm::ConstantArray::get( + VTableArrayType, {Int32TyNull, Int32TyNull, + Function::Create(FTy, Function::ExternalLinkage, + "VFuncInLocalGV", M.get())})}), + "LocalGV"); + + // Add type metadata for the test data, since vtables with type metadata are + // added to symtab. + ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV")); + LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV")); + InstrProfSymtab ProfSymtab; EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded()); @@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { EXPECT_EQ(StringRef(PGOName), PGOFuncName); EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str())); } + + StringRef VTables[] = {"ExternalGV", "LocalGV"}; + for (StringRef VTableName : VTables) { + GlobalVariable *GV = + M->getGlobalVariable(VTableName, /* AllowInternal=*/true); + + // Test that ProfSymtab returns the expected name given a hash. + std::string IRPGOName = getPGOName(*GV); + uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName); + EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID)); + EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second); + + // Test that ProfSymtab returns the expected global variable + EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID)); + } } // Testing symtab serialization and creator/deserialization interface >From 003319d8d1f791dbe0c34bf82f0043f71b330d68 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Tue, 13 Feb 2024 12:02:54 -0800 Subject: [PATCH 2/2] Use 'InstrProf::addSymbolName' to add vtables and simplify unit test --- llvm/include/llvm/ProfileData/InstrProf.h | 7 +-- llvm/lib/ProfileData/InstrProf.cpp | 7 ++- llvm/unittests/ProfileData/InstrProfTest.cpp | 50 ++++++++------------ 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index c18ffa9a5a5116..ca2260e52ff069 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -502,9 +502,10 @@ class InstrProfSymtab { // Add the vtable into the symbol table, by creating the following // map entries: - // - <MD5Hash(PGOVTableName), PGOVTableName> - // - <MD5Hash(PGOVTableName), V> - // - <MD5Hash(getCanonicalName(PGOVTableName), V) + // name-set = {PGOName} + {getCanonicalName(PGOName)} if the canonical name + // is different from pgo name. + // - In MD5NameMap: <MD5Hash(name), name> for name in name-set + // - In MD5VTableMap: <MD5Hash(name), name> for name in name-set Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName); // If the symtab is created by a series of calls to \c addFuncName, \c diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 4832370ab41f15..4fe4abeaf9041b 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -492,9 +492,12 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, StringRef VTablePGOName) { - + auto mapName = [&](StringRef Name) -> Error { - if (Error E = addVTableName(Name)) + // Use 'addSymbolName' rather than 'addVTableName' as 'VTableNames' is + // needed by in InstrProfWriter from llvm-profdata, but this function is + // called by compiler and tools with LLVM IR. + if (Error E = addSymbolName(Name)) return E; MD5VTableMap.emplace_back(GlobalValue::getGUID(Name), &VTable); return Error::success(); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index edde544660e454..775a10a1f14005 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1616,34 +1616,24 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { SmallVector<llvm::Type *, 1> tys = {VTableArrayType}; StructType *VTableType = llvm::StructType::get(Ctx, tys); - // Create a vtable definition with external linkage. - GlobalVariable *ExternalGV = new llvm::GlobalVariable( - *M, VTableType, /* isConstant= */ true, - llvm::GlobalValue::ExternalLinkage, - llvm::ConstantStruct::get( - VTableType, {llvm::ConstantArray::get( - VTableArrayType, - {Int32TyNull, Int32TyNull, - Function::Create(FTy, Function::ExternalLinkage, - "VFuncInExternalGV", M.get())})}), - "ExternalGV"); - - // Create a vtable definition for local-linkage function. - GlobalVariable *LocalGV = new llvm::GlobalVariable( - *M, VTableType, /* isConstant= */ true, - llvm::GlobalValue::InternalLinkage, - llvm::ConstantStruct::get( - VTableType, - {llvm::ConstantArray::get( - VTableArrayType, {Int32TyNull, Int32TyNull, - Function::Create(FTy, Function::ExternalLinkage, - "VFuncInLocalGV", M.get())})}), - "LocalGV"); - - // Add type metadata for the test data, since vtables with type metadata are - // added to symtab. - ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV")); - LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV")); + // Create two vtables in the module, one with external linkage and the other + // with local linkage. + for (auto [Name, Linkage] : + {std::pair{"ExternalGV", GlobalValue::ExternalLinkage}, + {"LocalGV", GlobalValue::InternalLinkage}}) { + llvm::Twine FuncName(Name, StringRef("VFunc")); + Function *VFunc = Function::Create(FTy, Linkage, FuncName, M.get()); + GlobalVariable *GV = new llvm::GlobalVariable( + *M, VTableType, /* isConstant= */ true, Linkage, + llvm::ConstantStruct::get( + VTableType, + {llvm::ConstantArray::get(VTableArrayType, + {Int32TyNull, Int32TyNull, VFunc})}), + Name); + // Add type metadata for the test data, since vtables with type metadata + // are added to symtab. + GV->addTypeMetadata(16, MDString::get(Ctx, Name)); + } InstrProfSymtab ProfSymtab; EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded()); @@ -1668,12 +1658,14 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { } StringRef VTables[] = {"ExternalGV", "LocalGV"}; - for (StringRef VTableName : VTables) { + for (auto [VTableName, PGOName] : {std::pair{"ExternalGV", "ExternalGV"}, + {"LocalGV", "MyModule.cpp;LocalGV"}}) { GlobalVariable *GV = M->getGlobalVariable(VTableName, /* AllowInternal=*/true); // Test that ProfSymtab returns the expected name given a hash. std::string IRPGOName = getPGOName(*GV); + EXPECT_STREQ(IRPGOName.c_str(), PGOName); uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName); EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID)); EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits