[clang] [clang-tools-extra] [compiler-rt] [lldb] [llvm] [mlir] [openmp] [polly] fix(python): fix comparison to None (PR #91857)
https://github.com/teresajohnson approved this pull request. compiler-rt changes lgtm https://github.com/llvm/llvm-project/pull/91857 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][fat-lto-objects] Make module flags match non-FatLTO pipelines (PR #83159)
@@ -1036,7 +1041,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) MPM.addPass(VerifierPass()); - if (Action == Backend_EmitBC || Action == Backend_EmitLL) { + if (Action == Backend_EmitBC || Action == Backend_EmitLL || + CodeGenOpts.FatLTO) { teresajohnson wrote: I see, I guess then the check for the CodeGenOpt is needed here to catch this case. https://github.com/llvm/llvm-project/pull/83159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][fat-lto-objects] Make module flags match non-FatLTO pipelines (PR #83159)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/83159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][fat-lto-objects] Make module flags match non-FatLTO pipelines (PR #83159)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/83159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][fat-lto-objects] Make module flags match non-FatLTO pipelines (PR #83159)
@@ -1036,7 +1041,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) MPM.addPass(VerifierPass()); - if (Action == Backend_EmitBC || Action == Backend_EmitLL) { + if (Action == Backend_EmitBC || Action == Backend_EmitLL || + CodeGenOpts.FatLTO) { teresajohnson wrote: What is the Action type with FatLTO? https://github.com/llvm/llvm-project/pull/83159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [LTO] Fix fat-lto output for -c -emit-llvm. (PR #79404)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/79404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxx] [libc] [clang] [lld] [clang-tools-extra] [flang] [compiler-rt] [llvm] [ELF] --save-temps --lto-emit-asm: derive ELF/asm file names from bitcode file names (PR #78835)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/78835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lld] [clang-tools-extra] [llvm] [clang] [ELF] --save-temps --lto-emit-asm: derive ELF/asm file names from bitcode file names (PR #78835)
@@ -352,32 +357,49 @@ std::vector BitcodeCompiler::compile() { pruneCache(config->thinLTOCacheDir, config->thinLTOCachePolicy, files); if (!config->ltoObjPath.empty()) { -saveBuffer(buf[0], config->ltoObjPath); +saveBuffer(buf[0].second, config->ltoObjPath); for (unsigned i = 1; i != maxTasks; ++i) - saveBuffer(buf[i], config->ltoObjPath + Twine(i)); - } - - if (config->saveTempsArgs.contains("prelink")) { -if (!buf[0].empty()) - saveBuffer(buf[0], config->outputFile + ".lto.o"); -for (unsigned i = 1; i != maxTasks; ++i) - saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); - } - - if (config->ltoEmitAsm) { -saveBuffer(buf[0], config->outputFile); -for (unsigned i = 1; i != maxTasks; ++i) - saveBuffer(buf[i], config->outputFile + Twine(i)); -return {}; + saveBuffer(buf[i].second, config->ltoObjPath + Twine(i)); } + bool savePrelink = config->saveTempsArgs.contains("prelink"); std::vector ret; - for (unsigned i = 0; i != maxTasks; ++i) -if (!buf[i].empty()) - ret.push_back(createObjFile(MemoryBufferRef(buf[i], "lto.tmp"))); + const char *ext = config->ltoEmitAsm ? ".s" : ".o"; + for (unsigned i = 0; i != maxTasks; ++i) { +StringRef bitcodeFilePath; +StringRef objBuf; +if (files[i]) { teresajohnson wrote: Thanks, those are helpful. Can you also add a comment before 373 on meaning of files[i] being null vs not - reading through the comments you added it sounds like files[i] is non-null when object file was found in the cache? https://github.com/llvm/llvm-project/pull/78835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lld] [clang-tools-extra] [llvm] [clang] [ELF] --save-temps --lto-emit-asm: derive ELF/asm file names from bitcode file names (PR #78835)
@@ -53,10 +53,10 @@ ; RUN: rm -fr cache && mkdir cache ; RUN: ld.lld --thinlto-cache-dir=cache --save-temps -o out b.bc a.bc -M | FileCheck %s --check-prefix=MAP -; RUN: ls out1.lto.o a.bc.0.preopt.bc b.bc.0.preopt.bc +; RUN: ls out.lto.a.o a.bc.0.preopt.bc b.bc.0.preopt.bc -; MAP: llvmcache-{{.*}}:(.text) -; MAP: llvmcache-{{.*}}:(.text) +; MAP: out.lto.b.o:(.text) teresajohnson wrote: Confused about what changed in the latest commit to cause these lines to need updating - was this just a missed test update in the prior commit? https://github.com/llvm/llvm-project/pull/78835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [lld] [clang-tools-extra] [clang] [ELF] --save-temps --lto-emit-asm: derive ELF/asm file names from bitcode file names (PR #78835)
@@ -46,8 +46,9 @@ class BitcodeCompiler { private: std::unique_ptr ltoObj; - std::vector> buf; + SmallVector>, 0> buf; teresajohnson wrote: ping on this comment. https://github.com/llvm/llvm-project/pull/78835 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [compiler-rt] [PGO] Exposing PGO's Counter Reset and File Dumping APIs (PR #76471)
teresajohnson wrote: > I realized one problem during testing IRPGO (thanks again for the suggestion > @minglotus-6 !). > > A function's control flow may change between `-fprofile-generate` and > `-fprofile-use` when we make use of definitions in the new header. For > example, one may have the following code: > > ```c > #include "profile/instr_prof_interface.h" > > void main() { >... > if (__llvm_profile_dump()) > return error; > > cleanup(); > } > ``` > > During `-fprofile-generate`, `__llvm_profile_dump` is a declared name and > main's control flow includes a branch to `return error;`. During > `-fprofile-use`, `__llvm_profile_dump()` is replaced by `(0)` and the > frontend eliminates the `if` statement and the branch to `return error`. Such > control flow change can lead to PGO warnings (hash mismatch). > > I think it may be OK to keep the PR this way because the new macros can > potentially cause control flow changes directly as well. The documentation is > updated > (https://github.com/llvm/llvm-project/pull/76471/files#diff-7389be311daf0b9b476c876bef04245fa3c0ad9337ce865682174bd77d53b648R2908) > to advise against using these APIs in a program's hot regions and warn about > possible impact on control flow. > > Do you all think this is reasonable? That's probably ok, as it doesn't make sense to do dumping etc in a hot region of code anyway. An alternate suggestion is to make these functions real functions that simply return 0 instead of #defined to 0. But that may not avoid the issue described above because early inlining will likely inline and simplify the code before IR PGO matching anyway. https://github.com/llvm/llvm-project/pull/76471 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [compiler-rt] [clang] [PGO] Exposing PGO's Counter Reset and File Dumping APIs (PR #76471)
teresajohnson wrote: > @teresajohnson I mentioned the same thing on > [discourse](https://discourse.llvm.org/t/pgo-are-the-llvm-profile-functions-stable-c-apis-across-llvm-releases/75832/5) > but it seems like linking on AIX does not support this model. I see. Perhaps instead of defining these away completely, they should be defined to a dummy function with the same signature that always returns success (0 I think)? Alternatively, not define them at all when __LLVM_INSTR_PROFILE_GENERATE not defined and have the user guard calls by `#ifdef __LLVM_INSTR_PROFILE_GENERATE`. Otherwise, the user either cannot check the return values, or they have to guard their usage by a check of __LLVM_INSTR_PROFILE_GENERATE anyway. https://github.com/llvm/llvm-project/pull/76471 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [clang-tools-extra] [PGO] Exposing PGO's Counter Reset and File Dumping APIs (PR #76471)
teresajohnson wrote: The way we have done this in the past is to declare these as weak symbols and check if they exist before calling. E.g.: ``` extern "C" __attribute__((weak)) int __llvm_profile_dump(void); if (__llvm_profile_dump) if (__llvm_profile_dump() != 0) { ... ``` Not necessarily opposed to adding the macros etc, but the advantage of the weak symbol approach vs what you have here is that the user code can check the return values. https://github.com/llvm/llvm-project/pull/76471 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] DiagnosticHandler: refactor error checking (PR #75889)
https://github.com/teresajohnson approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/75889 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] DiagnosticHandler: refactor error checking (PR #75889)
@@ -256,10 +256,13 @@ void LLVMContext::diagnose(const DiagnosticInfo ) { RS->emit(*OptDiagBase); // If there is a report handler, use it. - if (pImpl->DiagHandler && - (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) && - pImpl->DiagHandler->handleDiagnostics(DI)) -return; + if (pImpl->DiagHandler) { +if (DI.getSeverity() == DS_Error) + pImpl->DiagHandler->HasErrors = true; teresajohnson wrote: I didn't mean calling handle Diagnostics in more places. I just meant rather than directly setting the HasErrors flags here, do that in a new non-virtual method (e.g. prepareToHandleDiagnostics() or whatever), and call it here just before calling handle Diagnostics. To abstract away what it is actually doing and leave the setting of the flag to the DiagnosticHandler class. https://github.com/llvm/llvm-project/pull/75889 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] DiagnosticHandler: refactor error checking (PR #75889)
@@ -256,10 +256,13 @@ void LLVMContext::diagnose(const DiagnosticInfo ) { RS->emit(*OptDiagBase); // If there is a report handler, use it. - if (pImpl->DiagHandler && - (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) && - pImpl->DiagHandler->handleDiagnostics(DI)) -return; + if (pImpl->DiagHandler) { +if (DI.getSeverity() == DS_Error) + pImpl->DiagHandler->HasErrors = true; teresajohnson wrote: Perhaps move the set of HasErrors into a new non-virtual method that we call below just before calling handleDiagnostics. Also, should this be set when handleDiagnostics is not called? https://github.com/llvm/llvm-project/pull/75889 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lld] [llvm] [clang] [LTO] Improve diagnostics handling when parsing module-level inline assembly (PR #75726)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/75726 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Disable PGO instrumentation on naked function (PR #75224)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/75224 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Disable PGO instrumentation on naked function (PR #75224)
@@ -892,6 +892,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, } } + if (FD->hasAttr()) { teresajohnson wrote: Is this change needed given the separate change in LLVM skipPGOGen? https://github.com/llvm/llvm-project/pull/75224 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [clang-tools-extra] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
https://github.com/teresajohnson approved this pull request. LGTM other than a couple of minor comments and pending resolution of the LLVM IR test. Thanks! https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [clang-tools-extra] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -352,6 +366,8 @@ std::string getIRPGOFuncName(const Function , bool InLTO) { return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F)); } +// DEPRECATED. Use `getIRPGOFuncName`for new code. See that function for teresajohnson wrote: In the header it is described as for FE instrumentation. Probably want the comments to be consistent? https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang-tools-extra] [clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -0,0 +1,115 @@ +// This is a regression test for ThinLTO indirect-call-promotion when candidate +// callees need to be imported from another IR module. In the C++ test case, +// `main` calls `global_func` which is defined in another module. `global_func` +// has two indirect callees, one has external linkage and one has local linkage. +// All three functions should be imported into the IR module of main. + +// What the test does: +// - Generate raw profiles from executables and convert it to indexed profiles. +// During the conversion, a profiled callee address in raw profiles will be +// converted to function hash in indexed profiles. +// - Run IRPGO profile use and ThinTLO prelink pipeline and get LLVM bitcodes +// for both cpp files in the C++ test case. +// - Generate ThinLTO summary file with LLVM bitcodes, and run `function-import` pass. +// - Run `pgo-icall-prom` pass for the IR module which needs to import callees. + +// Use lld as linker for more robust test. We need to REQUIRE LLVMgold.so for +// LTO if default linker is GNU ld or gold anyway. +// REQUIRES: lld-available + +// Test should fail where linkage-name and mangled-name diverges, see issue https://github.com/llvm/llvm-project/issues/74565). +// Currently, this name divergence happens on Mach-O object file format, or on +// many (but not all) 32-bit Windows systems. +// +// XFAIL: system-darwin +// +// Mark 32-bit Windows as UNSUPPORTED for now as opposed to XFAIL. This test +// should fail on many (but not all) 32-bit Windows systems and succeed on the +// rest. The flexibility in triple string parsing makes it tricky to capture +// both sets accurately. i[3-9]86 specifies arch as Triple::ArchType::x86, (win32|windows) +// specifies OS as Triple::OS::Win32 +// +// UNSUPPORTED: target={{i[3-9]86-.*-(win32|windows).*}} + +// RUN: rm -rf %t && split-file %s %t && cd %t + +// Do setup work for all below tests. +// Generate raw profiles from real programs and convert it into indexed profiles. +// Use clangxx_pgogen for IR level instrumentation for C++. +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 lib.cpp main.cpp -o main +// RUN: env LLVM_PROFILE_FILE=main.profraw %run ./main +// RUN: llvm-profdata merge main.profraw -o main.profdata + +// Use profile on lib and get bitcode, test that local function callee0 has +// expected !PGOFuncName metadata and external function callee1 doesn't have +// !PGOFuncName metadata. Explicitly skip ICP pass to test ICP happens as +// expected in the IR module that imports functions from lib. +// RUN: %clang -mllvm -disable-icp -fprofile-use=main.profdata -flto=thin -O2 -c lib.cpp -o lib.bc +// RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=PGOName + +// Use profile on main and get bitcode. +// RUN: %clang -fprofile-use=main.profdata -flto=thin -O2 -c main.cpp -o main.bc + +// Run llvm-lto to get summary file. +// RUN: llvm-lto -thinlto -o summary main.bc lib.bc + +// Test the imports of functions. Default import thresholds would work but do +// explicit override to be more futureproof. Note all functions have one basic +// block with a function-entry-count of one, so they are actually hot functions +// per default profile summary hotness cutoff. +// RUN: opt -passes=function-import -import-instr-limit=100 -import-cold-multiplier=1 -summary-file summary.thinlto.bc main.bc -o main.import.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +// Test that '_Z11global_funcv' has indirect calls annotated with value profiles. +// RUN: llvm-dis main.import.bc -o - | FileCheck %s --check-prefix=IR + +// Test that both candidates are ICP'ed and there is no `!VP` in the IR. +// RUN: opt main.import.bc -icp-lto -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=ICP-IR,ICP-REMARK --implicit-check-not="!VP" + +// IMPORTS: main.cpp: Import _Z7callee1v +// IMPORTS: main.cpp: Import _ZL7callee0v.llvm.[[#]] +// IMPORTS: main.cpp: Import _Z11global_funcv + +// PGOName: define dso_local void @_Z7callee1v() {{.*}} !prof ![[#]] { +// PGOName: define internal void @_ZL7callee0v() {{.*}} !prof ![[#]] !PGOFuncName ![[#MD:]] { +// PGOName: ![[#MD]] = !{!"{{.*}}lib.cpp;_ZL7callee0v"} + +// IR-LABEL: define available_externally {{.*}} void @_Z11global_funcv() {{.*}} !prof ![[#]] { +// IR-NEXT: entry: +// IR-NEXT: %0 = load ptr, ptr @calleeAddrs +// IR-NEXT: tail call void %0(), !prof ![[#PROF1:]] +// IR-NEXT: %1 = load ptr, ptr getelementptr inbounds ([2 x ptr], ptr @calleeAddrs, +// IR-NEXT: tail call void %1(), !prof ![[#PROF2:]] + +// The GUID of indirect callee is the MD5 hash of `/path/to/lib.cpp:_ZL7callee0v` teresajohnson wrote: should this have the semicolon separator not a colon? https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang-tools-extra] [clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -1,39 +0,0 @@ -; Do setup work for all below tests: generate bitcode and combined index teresajohnson wrote: Without use of the raw profile, this test would not have caught the regression. If we think the new compiler-rt test is enough to catch this case in the future, then perhaps we don't need this LLVM test at all (there should already be some ICP tests). But if the compiler-rt test is not enough, because some people don't run those, then this should stay and use a raw profile as input. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][fatlto] Don't set ThinLTO module flag with FatLTO (PR #75079)
teresajohnson wrote: Added a comment to that issue, I think it would be good to understand why unified LTO is not expected in that case (for the assertion). https://github.com/llvm/llvm-project/pull/75079 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang-tools-extra] [llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -1,39 +1,45 @@ -; Do setup work for all below tests: generate bitcode and combined index -; RUN: opt -module-summary %s -o %t.bc -; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc +; The raw profiles and reduced IR inputs are generated from Inputs/update_icall_promotion_inputs.sh teresajohnson wrote: The reason for having this in a single test this way is specifically to detect mismatches in the separator used in the PGO name and in the ThinLTO index. For this we need to convert from raw profile and feed it through ThinLTO+ICP. Having separate tests would not do this. I.e. with the change from ':' to ';', the compiler-rt test would presumably have failed and would be updated, but the test using the proftext input would not fail. The lack of testing these pieces together led to the prior separator rename not updating all the necessary compiler bits. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang-tools-extra] [clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -1,39 +1,45 @@ -; Do setup work for all below tests: generate bitcode and combined index -; RUN: opt -module-summary %s -o %t.bc -; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc +; The raw profiles and reduced IR inputs are generated from Inputs/update_icall_promotion_inputs.sh teresajohnson wrote: No because the point of this test is to catch regressions if the separator character (which is hardcoded in the proftext) changes again in the future. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [llvm] [clang] [compiler-rt] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
teresajohnson wrote: > David says the itanium remapper file was only used once during gcc to llvm > transition, so not relevant here. I believe it was actually for the libstdc++ to libc++ transition (see https://reviews.llvm.org/D51247 and https://reviews.llvm.org/D51240). If it is broken we'll at least want to add a FIXME there. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
teresajohnson wrote: > Using the same added ICP test, profile matching on local-linkage > `_ZL7callee0v` using `clang++ -v -fuse-ld=lld -O2 > -fprofile-use=thinlto_icall_prom.profdata ` , as > [this](https://gist.github.com/minglotus-6/11817ba645c6b12cd7116f41bfb1185e) > pgo-instr-use output shows. Can you clarify what you are saying here - is it working or not working? https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -300,12 +316,8 @@ getIRPGONameForGlobalObject(const GlobalObject , GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { -Name.append(FileName.empty() ? "" : FileName); -Name.append(";"); - } Mangler().getNameWithPrefix(Name, , /*CannotUsePrivateLabel=*/true); teresajohnson wrote: > It wasn't broken in general, but it's needed to get -order_file working > correctly. Unfortunately this change broke aspects of ThinLTO ICP, however. Is it possible to change the handling of -order_file in the linker to modify the symbol names appropriately? > To avoid subtle issues when linkage-name is different from mangled names,,I'm > wondering if it warrants a change to use linkage-names (as opposed to mangled > name) in GlobalValue::getGlobalIdentifier in this PR. If the -order_name handling cannot be fixed as I suggested above, then yes, we need some solution to ensure that the hashed symbol names are consistent across PGO and ThinLTO. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -300,12 +316,8 @@ getIRPGONameForGlobalObject(const GlobalObject , GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { -Name.append(FileName.empty() ? "" : FileName); -Name.append(";"); - } Mangler().getNameWithPrefix(Name, , /*CannotUsePrivateLabel=*/true); teresajohnson wrote: Was PGO broken in general before D156569? Or is this specific to `-symbol-ordering-file` and `-order_file`? Also, it looks like some of the main changes here by the mangler for objective C are to strip the "\01" prefix. Is this different than the support to remove the '\1' mangling escape in getGlobalIdentifier()? Trying to figure out how we keep these interfaces in sync to avoid more issues with ICP... https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -300,12 +316,8 @@ getIRPGONameForGlobalObject(const GlobalObject , GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { -Name.append(FileName.empty() ? "" : FileName); -Name.append(";"); - } Mangler().getNameWithPrefix(Name, , /*CannotUsePrivateLabel=*/true); teresajohnson wrote: Can you give an example of where this is called such that there is a difference between the function name and linkage name (at least for c++ these are the same afaik), and what that difference looks like? Are there specific callsites used for the symbol ordering support that need the mangling? https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -246,11 +246,27 @@ std::string InstrProfError::message() const { char InstrProfError::ID = 0; -std::string getPGOFuncName(StringRef RawFuncName, - GlobalValue::LinkageTypes Linkage, +std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, teresajohnson wrote: I guess it depends on whether it is intentional that Clang (and Swift apparently?) use the old name. @ellishg was there a reason to leave that as is? If they must use the old one then maybe getFEPGOFuncName. If they should be transitioned eventually then using "Legacy" makes sense. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -246,11 +246,27 @@ std::string InstrProfError::message() const { char InstrProfError::ID = 0; -std::string getPGOFuncName(StringRef RawFuncName, - GlobalValue::LinkageTypes Linkage, +std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, teresajohnson wrote: Oh I missed the fact that one getPGOFuncName interface was left. I am only seeing 2 invocations of that interface outside of the unittest test. I would be in favor of doing renames of both getPGOFuncName interfaces in one patch. A separate NFC patch is a fine option, and keeps this patch just about fixing the ICP breakage caused by the delimiter change. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -300,12 +316,8 @@ getIRPGONameForGlobalObject(const GlobalObject , GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { -Name.append(FileName.empty() ? "" : FileName); -Name.append(";"); - } Mangler().getNameWithPrefix(Name, , /*CannotUsePrivateLabel=*/true); teresajohnson wrote: What I'm confused about is that the function name is already the mangled name, at least using clang with c++. Is it not for objective C? https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -246,11 +246,27 @@ std::string InstrProfError::message() const { char InstrProfError::ID = 0; -std::string getPGOFuncName(StringRef RawFuncName, - GlobalValue::LinkageTypes Linkage, +std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, teresajohnson wrote: Is this resolved? It looks like the most recent version of the patch changes getPGOFuncName to getLegacyPGOFuncName for the old format, and uses getIRPGOFuncName for the new format. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [PGO][GlobalValue][LTO]In GlobalValues::getGlobalIdentifier, use semicolon as delimiter for local-linkage varibles. (PR #74008)
@@ -300,12 +316,8 @@ getIRPGONameForGlobalObject(const GlobalObject , GlobalValue::LinkageTypes Linkage, StringRef FileName) { SmallString<64> Name; - if (llvm::GlobalValue::isLocalLinkage(Linkage)) { -Name.append(FileName.empty() ? "" : FileName); -Name.append(";"); - } Mangler().getNameWithPrefix(Name, , /*CannotUsePrivateLabel=*/true); teresajohnson wrote: @ellishg How much of D156569 relied on the invocation of Mangler? It is not mentioned in the patch description, only the rationale for changing ":" to ";". The problem is if these are out of sync, then cross-module importing of indirectly called local functions will continue to be broken in whatever cases Mangler().getNameWithPrefix affects. https://github.com/llvm/llvm-project/pull/74008 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [MemProf] Expand optimization scope to internal linakge function (PR #73236)
teresajohnson wrote: @snehasish and I chatted more about this offline. Using the dwarf info to figure out the right source name prefix during indexing is not straightforward. The source file name used as the prefix in the compiler for the IRPGOName is that of the TU. The source file of the line in the dwarf info may be a header. It could be possible to walk up the profiled inline frames to find a source file, but that is error prone in certain circumstances (e.g. if the profiled binary had thinlto cross module inlining, and if it isn't obvious which source file name is a non-included TU). I think your change to the matching is almost strictly better than the complete non-matching we get currently for local functions. It would even work in many cases without uniq suffixes, but uniq suffixes will make that work even better. So let's go ahead with that change. However, we'd like to request that you split the __cxx_global_var_init change into a separate patch, as it is somewhat orthogonal to the matching change, and in case there are unexpected issues with that change. Can you split that out and make the test case here for matching of a non __cxx_global_var_init function? Thanks! https://github.com/llvm/llvm-project/pull/73236 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [MemProf] Expand optimization scope to internal linakge function (PR #73236)
teresajohnson wrote: > > Yes, you're right. As an alternative can we use the symbol table and find > > Bind = LOCAL to add the prefix before hashing? > > If we choose this method. I think we can't deal with the situation which one > symbol is not local linkage type in thin compile, but will be converted to > local linkage after thin backend by thinlto's internalization. In this > situation function name in llvm-profdata will have prefix with file name. But > when llvm consumes memory profile, PGOFuncName won't return function name > with prefix. If I understand the issue you are describing, that would only occur if the instrumentation build also used ThinLTO. Is that a typical use case for you? https://github.com/llvm/llvm-project/pull/73236 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -1530,14 +1530,11 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, } ModulePassManager -PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, -bool EmitSummary) { +PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level) { ModulePassManager MPM; - MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary, - ThinLTO - ? buildThinLTOPreLinkDefaultPipeline(Level) - : buildLTOPreLinkDefaultPipeline(Level))); - MPM.addPass(buildPerModuleDefaultPipeline(Level)); + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level)); + MPM.addPass(EmbedBitcodePass()); + MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); teresajohnson wrote: What was the downside of using ThinLTODefaultPipeline always? I guess it was essentially over-optimizing in the non-LTO case? I guess using the ThinLTODefaultPipeline only under SamplePGO is ok with me, although it seems like over time as the pipelines get modified it will be difficult to ensure that is the only case where the pipelines get out of sync. I think in either case we are essentially saying: if you use Fat LTO then don't expect the resulting non-LTO native code to be the same as that of a fully non-LTO compile. In one case there is more optimization, in the other there is the risk of future deoptimization if things don't stay in sync. https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
https://github.com/teresajohnson approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -1861,6 +1861,13 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions , ArgList , if (Args.hasArg(OPT_funified_lto)) Opts.PrepareForThinLTO = true; } + if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects, + options::OPT_fno_fat_lto_objects)) { +if (!Args.hasArg(OPT_funified_lto)) + Diags.Report(diag::err_drv_incompatible_options) teresajohnson wrote: It might be less confusing to users if this error message is only given upon an explicit -fno-unified-lto, and diag::err_drv_argument_only_allowed_with is used for the lack of -funified-lto. Also can you add driver tests to check that we get the expected error(s) in the expected option combinations? https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [MemProf] Expand optimization scope to internal linakge function (PR #73236)
teresajohnson wrote: > @lifengxiang1025 thanks for flagging this issue. I think it's best to not > rely on unique-internal-linkage-name here. Instead we should extend the logic > in RawMemProfReader.cpp to include "filename;" if the function is internal > linkage as expected by IRPGOFuncName. Can you try this approach? I don't think we want to change the name in the frames themselves, as the names in the debug info when matching don't contain this prefix. Although I suppose we could modify the matcher to add the prefixes when matching frames too. I.e. here: https://github.com/llvm/llvm-project/blob/c0fe0719df457b0992606b0f2a235719a5bbfd54/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp#L806 It sounds like the issue is that the names used to index the memprof records do not use the PGOFuncName scheme and therefore we never find any memprof records for an internal linkage function during matching. I.e. the GUID passed to InstrProfWriter::addMemProfRecord. Unfortunately, looking at RawMemProfReader.cpp it does look like that eventually gets populated out of the Function (GUID) field from same Frame setup code. So we'd either need to do some extra handling in that code so that the prefix is only used for the record entry, or change the matcher (the latter may be the easiest). https://github.com/llvm/llvm-project/pull/73236 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; - PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO; teresajohnson wrote: Actually, I believe the cc1 parsing is handled by ParseCodeGenArgs in clang/lib/Frontend/CompilerInvocation.cpp. You can potentially add something in there to ensure UnifiedLTO is set with FatLTO? Or give your error there that UnifiedLTO must be specified with Fat LTO. https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; - PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO; teresajohnson wrote: Ah ok. I guess that would be expected. Whether it is test friendly is another question, but in general the driver does a lot of option set up for the cc1 invocation. https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; - PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO; teresajohnson wrote: Not sure I follow. Isn't the change in Driver/Toolchains/Clang.cpp going to ensure that -funified-lto is also passed to the cc1 invocation, and thus both options should be set in CodeGenOpts during the cc1 invocation? https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][llvm][fatlto] Avoid cloning modules in FatLTO (PR #72180)
@@ -810,7 +810,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; - PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO || CodeGenOpts.FatLTO; teresajohnson wrote: Won't your change to lib/Driver/ToolChains/Clang.cpp below mean that CodeGenOpts.UnifiedLTO will always be set with FatLTO? Do we thus need this change or the one further below for the module flag (and maybe add an assert that UnifiedLTO is set of FatLTO is set)? https://github.com/llvm/llvm-project/pull/72180 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [MemProf] Expand optimization scope to internal linakge function (PR #73236)
teresajohnson wrote: @snehasish can you take a look at the issue described here? Should we be doing something different in llvm-profdata? https://github.com/llvm/llvm-project/pull/73236 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Support VFE in thinLTO (PR #69735)
@@ -338,12 +574,33 @@ PreservedAnalyses GlobalDCEPass::run(Module , ModuleAnalysisManager ) { // The second pass drops the bodies of functions which are dead... std::vector DeadFunctions; - for (Function : M) + std::set DeadFunctionsSet; + auto funcRemovedInIndex = [&](Function *F) -> bool { +if (!ImportSummary) + return false; +auto = ImportSummary->funcsToBeRemoved(); +// If function is internalized, its current GUID will be different +// from the GUID in funcsToBeRemoved. Treat it as a global and search +// again. +if (vfuncsRemoved.count(F->getGUID())) + return true; +if (vfuncsRemoved.count(GlobalValue::getGUID(F->getName( teresajohnson wrote: These lookups are a little dangerous as I think there are cases where we might rename and it wouldn't find the right function. For internalization we tag the function with an attribute before any thinlto renaming, linkage changes, or other optimization passes. See https://github.com/llvm/llvm-project/blob/0936a718492d248a726b8e8d4529e4aa194bc8e9/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp#L271. https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Support VFE in thinLTO (PR #69735)
@@ -34,12 +40,223 @@ static cl::opt ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); +static cl::opt ClReadSummary( +"globaldce-read-summary", +cl::desc("Read summary from given bitcode before running pass"), +cl::Hidden); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs,"Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); STATISTIC(NumVFuncs,"Number of virtual functions removed"); +namespace llvm { + +// Returning a representative summary for the vtable, also set isSafe. +static const GlobalVarSummary * +getVTableFuncsForTId(const TypeIdOffsetVtableInfo , bool ) { + // Find a representative copy of the vtable initializer. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto : P.VTableVI.getSummaryList()) { +if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) { +isSafe = false; +return nullptr; + } + LocalFound = true; +} +auto *CurVS = cast(S->getBaseObject()); +// Ignore if vTableFuncs is empty and vtable is available_externally. +if (!CurVS->vTableFuncs().empty() || +!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { + VS = CurVS; + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) { +isSafe = false; +return VS; + } +} + } + + if (!VS) { +isSafe = false; +return nullptr; + } + if (!VS->isLive()) { +isSafe = true; +return nullptr; + } + isSafe = true; + return VS; +} + +static void collectSafeVTables( +ModuleSummaryIndex , +DenseMap> , +std::map> ) { + // Update VFESafeVTablesAndFns with information from summary. + for (auto : Summary.typeIdCompatibleVtableMap()) { +NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); +LLVM_DEBUG(dbgs() << "TId " << GlobalValue::getGUID(P.first) << " " + << P.first << "\n"); + } + llvm::errs() << "VFEThinLTO number of TIds: " << NameByGUID.size() << "\n"; + + // VFESafeVTablesAndFns: map from VI for vTable to VI for vfunc + std::map> vFuncSet; + unsigned numSafeVFuncs = 0; + // Collect stats for VTables (safe, public-visibility, other). + std::set vTablePublicVis; + std::set vTableOther; + for (auto : Summary.typeIdCompatibleVtableMap()) { +for (const TypeIdOffsetVtableInfo : TidSummary.second) { + LLVM_DEBUG(dbgs() << "TId-vTable " << TidSummary.first << " " +<< P.VTableVI.name() << " " << P.AddressPointOffset +<< "\n"); + bool isSafe = false; + const GlobalVarSummary *VS = getVTableFuncsForTId(P, isSafe); + if (!isSafe && VS) +vTablePublicVis.insert(P.VTableVI); + if ((isSafe && !VS) || (!isSafe && !VS)) +vTableOther.insert(P.VTableVI); + if (!isSafe || !VS) { +continue; + } + + // Go through VS->vTableFuncs + for (auto VTP : VS->vTableFuncs()) { +if (vFuncSet.find(P.VTableVI) == vFuncSet.end() || +!vFuncSet[P.VTableVI].count(VTP.FuncVI.getGUID())) { + VFESafeVTablesAndFns[P.VTableVI].push_back(VTP); + LLVM_DEBUG(dbgs() + << "vTable " << P.VTableVI.name() << " " + << VTP.FuncVI.name() << " " << VTP.VTableOffset << "\n"); + ++numSafeVFuncs; +} +vFuncSet[P.VTableVI].insert(VTP.FuncVI.getGUID()); + } +} + } + llvm::errs() << "VFEThinLTO number of vTables: " << vFuncSet.size() << " " + << vTablePublicVis.size() << " " << vTableOther.size() << "\n"; + llvm::errs() << "VFEThinLTO numSafeVFuncs: " << numSafeVFuncs << "\n"; +} + +static void checkVTableLoadsIndex( +ModuleSummaryIndex , +DenseMap> , +std::map> , +std::map> ) { + // Go through Function summarys for intrinsics, also funcHasNonVTableRef to + // erase entries from VFESafeVTableAndFns. teresajohnson wrote: why and when are entries being erased? More qualitative comments about what this algorithm is doing would be helpful. https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
@@ -1362,6 +1362,8 @@ class ModuleSummaryIndex { // Temporary map while building StackIds list. Clear when index is completely // built via releaseTemporaryMemory. std::map StackIdToIndex; + std::set FuncsWithNonVtableRef; teresajohnson wrote: Instead of sets of GUIDs, it would probably be more efficient to represent, and to access, to just add new function flags (FFlags in the FunctionSummary). https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Support VFE in thinLTO (PR #69735)
@@ -34,12 +40,223 @@ static cl::opt ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); +static cl::opt ClReadSummary( +"globaldce-read-summary", +cl::desc("Read summary from given bitcode before running pass"), +cl::Hidden); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs,"Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); STATISTIC(NumVFuncs,"Number of virtual functions removed"); +namespace llvm { + +// Returning a representative summary for the vtable, also set isSafe. +static const GlobalVarSummary * +getVTableFuncsForTId(const TypeIdOffsetVtableInfo , bool ) { + // Find a representative copy of the vtable initializer. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto : P.VTableVI.getSummaryList()) { +if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) { +isSafe = false; +return nullptr; + } + LocalFound = true; +} +auto *CurVS = cast(S->getBaseObject()); +// Ignore if vTableFuncs is empty and vtable is available_externally. +if (!CurVS->vTableFuncs().empty() || +!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { + VS = CurVS; + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) { +isSafe = false; +return VS; + } +} + } + + if (!VS) { +isSafe = false; +return nullptr; + } + if (!VS->isLive()) { +isSafe = true; +return nullptr; + } + isSafe = true; + return VS; +} + +static void collectSafeVTables( +ModuleSummaryIndex , +DenseMap> , +std::map> ) { + // Update VFESafeVTablesAndFns with information from summary. + for (auto : Summary.typeIdCompatibleVtableMap()) { +NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); +LLVM_DEBUG(dbgs() << "TId " << GlobalValue::getGUID(P.first) << " " + << P.first << "\n"); + } + llvm::errs() << "VFEThinLTO number of TIds: " << NameByGUID.size() << "\n"; + + // VFESafeVTablesAndFns: map from VI for vTable to VI for vfunc + std::map> vFuncSet; + unsigned numSafeVFuncs = 0; + // Collect stats for VTables (safe, public-visibility, other). + std::set vTablePublicVis; + std::set vTableOther; + for (auto : Summary.typeIdCompatibleVtableMap()) { +for (const TypeIdOffsetVtableInfo : TidSummary.second) { + LLVM_DEBUG(dbgs() << "TId-vTable " << TidSummary.first << " " +<< P.VTableVI.name() << " " << P.AddressPointOffset +<< "\n"); + bool isSafe = false; + const GlobalVarSummary *VS = getVTableFuncsForTId(P, isSafe); + if (!isSafe && VS) +vTablePublicVis.insert(P.VTableVI); + if ((isSafe && !VS) || (!isSafe && !VS)) +vTableOther.insert(P.VTableVI); + if (!isSafe || !VS) { +continue; + } + + // Go through VS->vTableFuncs teresajohnson wrote: Can you expand on the comment - what is this code doing and why? https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
@@ -34,12 +40,223 @@ static cl::opt ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); +static cl::opt ClReadSummary( +"globaldce-read-summary", +cl::desc("Read summary from given bitcode before running pass"), +cl::Hidden); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs,"Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); STATISTIC(NumVFuncs,"Number of virtual functions removed"); +namespace llvm { + +// Returning a representative summary for the vtable, also set isSafe. +static const GlobalVarSummary * +getVTableFuncsForTId(const TypeIdOffsetVtableInfo , bool ) { + // Find a representative copy of the vtable initializer. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto : P.VTableVI.getSummaryList()) { +if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) { +isSafe = false; +return nullptr; + } + LocalFound = true; +} +auto *CurVS = cast(S->getBaseObject()); +// Ignore if vTableFuncs is empty and vtable is available_externally. +if (!CurVS->vTableFuncs().empty() || +!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { + VS = CurVS; + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) { +isSafe = false; +return VS; + } +} + } + + if (!VS) { +isSafe = false; +return nullptr; + } + if (!VS->isLive()) { +isSafe = true; +return nullptr; + } + isSafe = true; + return VS; +} + +static void collectSafeVTables( +ModuleSummaryIndex , +DenseMap> , +std::map> ) { + // Update VFESafeVTablesAndFns with information from summary. + for (auto : Summary.typeIdCompatibleVtableMap()) { +NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); +LLVM_DEBUG(dbgs() << "TId " << GlobalValue::getGUID(P.first) << " " + << P.first << "\n"); + } + llvm::errs() << "VFEThinLTO number of TIds: " << NameByGUID.size() << "\n"; + + // VFESafeVTablesAndFns: map from VI for vTable to VI for vfunc teresajohnson wrote: This comment seems to be in the wrong place, can you add a description of the below data structure (and describe VFESafeVTablesAndFns somewhere too, maybe where it is defined?) https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Support VFE in thinLTO (PR #69735)
@@ -775,6 +783,58 @@ static void computeVariableSummary(ModuleSummaryIndex , Index.addGlobalValueSummary(V, std::move(GVarSummary)); } +static void ComputeDependencies( teresajohnson wrote: nit: function should be lowerCamelCase https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
@@ -34,12 +40,223 @@ static cl::opt ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); +static cl::opt ClReadSummary( +"globaldce-read-summary", +cl::desc("Read summary from given bitcode before running pass"), +cl::Hidden); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs,"Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); STATISTIC(NumVFuncs,"Number of virtual functions removed"); +namespace llvm { + +// Returning a representative summary for the vtable, also set isSafe. +static const GlobalVarSummary * +getVTableFuncsForTId(const TypeIdOffsetVtableInfo , bool ) { + // Find a representative copy of the vtable initializer. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto : P.VTableVI.getSummaryList()) { +if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) { +isSafe = false; +return nullptr; + } + LocalFound = true; +} +auto *CurVS = cast(S->getBaseObject()); +// Ignore if vTableFuncs is empty and vtable is available_externally. +if (!CurVS->vTableFuncs().empty() || +!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { + VS = CurVS; + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) { +isSafe = false; +return VS; + } +} + } + + if (!VS) { +isSafe = false; +return nullptr; + } + if (!VS->isLive()) { +isSafe = true; +return nullptr; + } + isSafe = true; + return VS; +} + +static void collectSafeVTables( +ModuleSummaryIndex , +DenseMap> , +std::map> ) { + // Update VFESafeVTablesAndFns with information from summary. + for (auto : Summary.typeIdCompatibleVtableMap()) { +NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); +LLVM_DEBUG(dbgs() << "TId " << GlobalValue::getGUID(P.first) << " " + << P.first << "\n"); + } + llvm::errs() << "VFEThinLTO number of TIds: " << NameByGUID.size() << "\n"; + + // VFESafeVTablesAndFns: map from VI for vTable to VI for vfunc + std::map> vFuncSet; + unsigned numSafeVFuncs = 0; + // Collect stats for VTables (safe, public-visibility, other). + std::set vTablePublicVis; + std::set vTableOther; + for (auto : Summary.typeIdCompatibleVtableMap()) { +for (const TypeIdOffsetVtableInfo : TidSummary.second) { + LLVM_DEBUG(dbgs() << "TId-vTable " << TidSummary.first << " " +<< P.VTableVI.name() << " " << P.AddressPointOffset +<< "\n"); + bool isSafe = false; + const GlobalVarSummary *VS = getVTableFuncsForTId(P, isSafe); + if (!isSafe && VS) +vTablePublicVis.insert(P.VTableVI); + if ((isSafe && !VS) || (!isSafe && !VS)) +vTableOther.insert(P.VTableVI); + if (!isSafe || !VS) { +continue; + } + + // Go through VS->vTableFuncs + for (auto VTP : VS->vTableFuncs()) { +if (vFuncSet.find(P.VTableVI) == vFuncSet.end() || +!vFuncSet[P.VTableVI].count(VTP.FuncVI.getGUID())) { + VFESafeVTablesAndFns[P.VTableVI].push_back(VTP); + LLVM_DEBUG(dbgs() + << "vTable " << P.VTableVI.name() << " " + << VTP.FuncVI.name() << " " << VTP.VTableOffset << "\n"); + ++numSafeVFuncs; +} +vFuncSet[P.VTableVI].insert(VTP.FuncVI.getGUID()); + } +} + } + llvm::errs() << "VFEThinLTO number of vTables: " << vFuncSet.size() << " " + << vTablePublicVis.size() << " " << vTableOther.size() << "\n"; + llvm::errs() << "VFEThinLTO numSafeVFuncs: " << numSafeVFuncs << "\n"; +} + +static void checkVTableLoadsIndex( +ModuleSummaryIndex , +DenseMap> , +std::map> , +std::map> ) { + // Go through Function summarys for intrinsics, also funcHasNonVTableRef to + // erase entries from VFESafeVTableAndFns. + for (auto : Summary) { +for (auto : PI.second.SummaryList) { + auto *FS = dyn_cast(S.get()); + if (!FS) +continue; + // We should ignore Tid if there is a type.checked.load with Offset not + // ConstantInt. Currently ModuleSummaryAnalysis will update TypeTests. + for (GlobalValue::GUID G : FS->type_tests()) { +if (NameByGUID.find(G) == NameByGUID.end()) + continue; +auto TidSummary = +Summary.getTypeIdCompatibleVtableSummary(NameByGUID[G][0]); +for (const TypeIdOffsetVtableInfo : *TidSummary) { + LLVM_DEBUG(dbgs() << "unsafe-vtable due to type_tests: " +<< P.VTableVI.name() << "\n"); + VFESafeVTablesAndFns.erase(P.VTableVI); +} + } + // Go through vTableFuncs to find the potential callees + auto CheckVLoad
[llvm] [clang] Support VFE in thinLTO (PR #69735)
@@ -34,12 +40,223 @@ static cl::opt ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); +static cl::opt ClReadSummary( +"globaldce-read-summary", +cl::desc("Read summary from given bitcode before running pass"), +cl::Hidden); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs,"Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); STATISTIC(NumVFuncs,"Number of virtual functions removed"); +namespace llvm { + +// Returning a representative summary for the vtable, also set isSafe. +static const GlobalVarSummary * +getVTableFuncsForTId(const TypeIdOffsetVtableInfo , bool ) { + // Find a representative copy of the vtable initializer. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto : P.VTableVI.getSummaryList()) { +if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) { +isSafe = false; +return nullptr; + } + LocalFound = true; +} +auto *CurVS = cast(S->getBaseObject()); +// Ignore if vTableFuncs is empty and vtable is available_externally. +if (!CurVS->vTableFuncs().empty() || +!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { + VS = CurVS; + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) { +isSafe = false; +return VS; + } +} + } + + if (!VS) { +isSafe = false; +return nullptr; + } + if (!VS->isLive()) { +isSafe = true; +return nullptr; + } + isSafe = true; + return VS; +} + +static void collectSafeVTables( +ModuleSummaryIndex , +DenseMap> , +std::map> ) { + // Update VFESafeVTablesAndFns with information from summary. + for (auto : Summary.typeIdCompatibleVtableMap()) { +NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); +LLVM_DEBUG(dbgs() << "TId " << GlobalValue::getGUID(P.first) << " " + << P.first << "\n"); + } + llvm::errs() << "VFEThinLTO number of TIds: " << NameByGUID.size() << "\n"; + + // VFESafeVTablesAndFns: map from VI for vTable to VI for vfunc + std::map> vFuncSet; + unsigned numSafeVFuncs = 0; + // Collect stats for VTables (safe, public-visibility, other). + std::set vTablePublicVis; + std::set vTableOther; + for (auto : Summary.typeIdCompatibleVtableMap()) { +for (const TypeIdOffsetVtableInfo : TidSummary.second) { + LLVM_DEBUG(dbgs() << "TId-vTable " << TidSummary.first << " " +<< P.VTableVI.name() << " " << P.AddressPointOffset +<< "\n"); + bool isSafe = false; + const GlobalVarSummary *VS = getVTableFuncsForTId(P, isSafe); + if (!isSafe && VS) +vTablePublicVis.insert(P.VTableVI); + if ((isSafe && !VS) || (!isSafe && !VS)) +vTableOther.insert(P.VTableVI); + if (!isSafe || !VS) { +continue; + } + + // Go through VS->vTableFuncs + for (auto VTP : VS->vTableFuncs()) { +if (vFuncSet.find(P.VTableVI) == vFuncSet.end() || +!vFuncSet[P.VTableVI].count(VTP.FuncVI.getGUID())) { + VFESafeVTablesAndFns[P.VTableVI].push_back(VTP); + LLVM_DEBUG(dbgs() + << "vTable " << P.VTableVI.name() << " " + << VTP.FuncVI.name() << " " << VTP.VTableOffset << "\n"); + ++numSafeVFuncs; +} +vFuncSet[P.VTableVI].insert(VTP.FuncVI.getGUID()); + } +} + } + llvm::errs() << "VFEThinLTO number of vTables: " << vFuncSet.size() << " " teresajohnson wrote: Should these be optimization remarks or debug messages? https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
@@ -1362,6 +1362,8 @@ class ModuleSummaryIndex { // Temporary map while building StackIds list. Clear when index is completely // built via releaseTemporaryMemory. std::map StackIdToIndex; + std::set FuncsWithNonVtableRef; + std::set VFuncsToBeRemoved; // no need to serialize teresajohnson wrote: It would need to be serialized for distributed thinlto. A better thing would be to describe which of these are produced by the per-module analysis and used by the thin link, vs produced by the thin link and consumed by the ThinLTO backends. However, I suggest making these flags as noted above (but those should be documented with this information too) https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
https://github.com/teresajohnson commented: Thanks for the patch! I didn't go through in too much detail yet, but have some mostly higher level comments and suggestions. https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Support VFE in thinLTO (PR #69735)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/69735 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
https://github.com/teresajohnson approved this pull request. Somehow I missed those other uses of the ITANIUM prefix. Thanks for cleaning it up though I think the new names are more descriptive. https://github.com/llvm/llvm-project/pull/70841 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
@@ -101,14 +119,16 @@ // ITANIUM-OPT-SAME: !type [[EF16:![0-9]+]] // ITANIUM-OPT: @llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTVN5test31EE] -// MS: comdat($"??_7A@@6B@"), !type [[A8:![0-9]+]] -// MS: comdat($"??_7B@@6B0@@"), !type [[B8:![0-9]+]] -// MS: comdat($"??_7B@@6BA@@@"), !type [[A8]] -// MS: comdat($"??_7C@@6B@"), !type [[A8]] -// MS: comdat($"??_7D@?A0x{{[^@]*}}@@6BB@@@"), !type [[B8]], !type [[D8:![0-9]+]] -// MS: comdat($"??_7D@?A0x{{[^@]*}}@@6BA@@@"), !type [[A8]] -// MS: comdat($"??_7FA@?1??foo@@YAXXZ@6B@"), !type [[A8]], !type [[FA8:![0-9]+]] +// MS-TYPEMETADATA: comdat($"??_7A@@6B@"), !type [[A8:![0-9]+]] +// MS-TYPEMETADATA: comdat($"??_7B@@6B0@@"), !type [[B8:![0-9]+]] +// MS-TYPEMETADATA: comdat($"??_7B@@6BA@@@"), !type [[A8]] +// MS-TYPEMETADATA: comdat($"??_7C@@6B@"), !type [[A8]] +// MS-TYPEMETADATA: comdat($"??_7D@?A0x{{[^@]*}}@@6BB@@@"), !type [[B8]], !type [[D8:![0-9]+]] +// MS-TYPEMETADATA: comdat($"??_7D@?A0x{{[^@]*}}@@6BA@@@"), !type [[A8]] +// MS-TYPEMETADATA: comdat($"??_7FA@?1??foo@@YAXXZ@6B@"), !type [[A8]], !type [[FA8:![0-9]+]] +// Test !type doesn't exist in the generated IR. +// NOTYPEMD-NOT: !type teresajohnson wrote: Instead of this do an --implicit-check-not='!type' on the FileCheck invocations https://github.com/llvm/llvm-project/pull/70841 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
@@ -1,98 +1,116 @@ + // Tests for the cfi-vcall feature: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=NDIAG %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-ABORT %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-recover=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-RECOVER %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=MS --check-prefix=TT-MS --check-prefix=NDIAG %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=NDIAG %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-ABORT %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-recover=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-RECOVER %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS --check-prefix=NDIAG %s // Tests for the whole-program-vtables feature: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM-DEFAULT %s // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s // Tests for cfi + whole-program-vtables: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-MD %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
@@ -1312,7 +1312,7 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout ) { - if (!getCodeGenOpts().LTOUnit) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) teresajohnson wrote: Add a comment here and in the other file noting why it is added for IR instrumentation. https://github.com/llvm/llvm-project/pull/70841 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
@@ -1,98 +1,116 @@ + // Tests for the cfi-vcall feature: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=NDIAG %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-ABORT %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-recover=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-RECOVER %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=MS --check-prefix=TT-MS --check-prefix=NDIAG %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=NDIAG %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-ABORT %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-recover=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN --check-prefix=ITANIUM-MD-DIAG --check-prefix=ITANIUM-DIAG --check-prefix=DIAG --check-prefix=DIAG-RECOVER %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-NVT-NO-RV --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS --check-prefix=NDIAG %s // Tests for the whole-program-vtables feature: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=ITANIUM-TYPEMETADATA --check-prefix=ITANIUM-MD --check-prefix=TT-ITANIUM-HIDDEN %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM-DEFAULT %s // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s // Tests for cfi + whole-program-vtables: -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-MD %s -// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS
[clang] [Clang] Emit type metadata on vtables when IRPGO instrumentation option is on. (PR #70841)
@@ -0,0 +1,145 @@ +// Test that type metadata are emitted with -fprofile-generate +// +// RUN: %clang -fprofile-generate -fno-lto -target x86_64-unknown-linux -emit-llvm -S %s -o - | FileCheck %s --check-prefix=ITANIUM +// RUN: %clang -fprofile-generate -fno-lto -target x86_64-pc-windows-msvc -emit-llvm -S %s -o - | FileCheck %s --check-prefix=MS teresajohnson wrote: Yes I think that would be more typical and better imo (i.e. use %clang_cc to test) https://github.com/llvm/llvm-project/pull/70841 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -78,7 +78,11 @@ static int GetCpuId(void) { // will seg fault as the address of __vdso_getcpu will be null. if (!memprof_inited) return -1; +#if SANITIZER_APPLE + return 0; teresajohnson wrote: If there is a way to do this on Apple then add a FIXME? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -0,0 +1,83 @@ +// UNSUPPORTED: ios + +// RUN: %clangxx_memprof -O0 %s -o %t +// RUN: %env_memprof_opts=print_binary_refs=true:print_text=true:log_path=stdout:verbosity=2 %run %t &> %t.log +// RUN: llvm-nm %t &> %t2.log +// RUN: cat %t2.log %t.log | FileCheck %s + +#include +struct __attribute__((visibility("default"))) A { + virtual void foo() {} +}; + +void test_1(A *p) { + // A has default visibility, so no need for type.checked.load. + p->foo(); +} + +struct __attribute__((visibility("hidden"))) +[[clang::lto_visibility_public]] B { + virtual void foo() {} +}; + +void test_2(B *p) { + // B has public LTO visibility, so no need for type.checked.load. + p->foo(); +} + +struct __attribute__((visibility("hidden"))) C { + virtual void foo() {} + virtual void bar() {} +}; + +void test_3(C *p) { + // C has hidden visibility, so we generate type.checked.load to allow VFE. + p->foo(); +} + +void test_4(C *p) { + // When using type.checked.load, we pass the vtable offset to the intrinsic, + // rather than adding it to the pointer with a GEP. + p->bar(); +} + +void test_5(C *p, void (C::*q)(void)) { + // We also use type.checked.load for the virtual side of member function + // pointer calls. We use a GEP to calculate the address to load from and pass + // 0 as the offset to the intrinsic, because we know that the load must be + // from exactly the point marked by one of the function-type metadatas (in + // this case "_ZTSM1CFvvE.virtual"). If we passed the offset from the member + // function pointer to the intrinsic, this information would be lost. No + // codegen changes on the non-virtual side. + (p->*q)(); +} +int main() { + C *p = new C; + test_3(p); + test_4(p); + //__memprof_profile_dump(); // dump accesses to p, no dumping to access of the vtable + A *a = new A; + test_1(a); + B *b = new B; + test_2(b); + __sanitizer_set_report_path("stdout"); + //__memprof_profile_dump(); + //__sanitizer_set_report_path(nullptr); + return 0; // at exit, dump accesses to a, b +} +// CHECK: __ZTV1A +// CHECK: __ZTV1B +// 5 sections corresponding to xxx xxx __got __mod_init_func __const +// vtables are in __const +// CHECK: BuildIdName:{{.*}}memprof-vtable teresajohnson wrote: Where is the memprof-vtable module name coming from? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -747,8 +749,10 @@ endif() if (OS_NAME MATCHES "Linux|FreeBSD|Windows|NetBSD|SunOS") set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME TRUE) + set(COMPILER_RT_MEMPROF_HAS_STATIC_RUNTIME TRUE) else() set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME FALSE) + set(COMPILER_RT_MEMPROF_HAS_STATIC_RUNTIME TRUE) teresajohnson wrote: Is this meant to be FALSE? Otherwise this new variable is always TRUE. https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -279,13 +286,54 @@ struct Allocator { Print(Value->mib, Key, bool(Arg)); } + using SegmentEntry = ::llvm::memprof::SegmentEntry; void FinishAndWrite() { if (print_text && common_flags()->print_module_map) DumpProcessMap(); allocator.ForceLock(); InsertLiveBlocks(); +#if SANITIZER_APPLE +if (print_binary_refs) { + __sanitizer::ListOfModules List; + List.init(); + ArrayRef Modules(List.begin(), List.end()); + for (const auto : Modules) { +for (const auto : Module.ranges()) { + if (true) { // Segment.executable) { +SegmentEntry Entry(Segment.beg, Segment.end, Module.base_address()); +// CHECK(Module.uuid_size() <= MEMPROF_BUILDID_MAX_SIZE); teresajohnson wrote: Should this check and the line below it be uncommented? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -0,0 +1,83 @@ +// UNSUPPORTED: ios + +// RUN: %clangxx_memprof -O0 %s -o %t +// RUN: %env_memprof_opts=print_binary_refs=true:print_text=true:log_path=stdout:verbosity=2 %run %t &> %t.log +// RUN: llvm-nm %t &> %t2.log +// RUN: cat %t2.log %t.log | FileCheck %s + +#include +struct __attribute__((visibility("default"))) A { + virtual void foo() {} +}; + +void test_1(A *p) { + // A has default visibility, so no need for type.checked.load. + p->foo(); +} + +struct __attribute__((visibility("hidden"))) +[[clang::lto_visibility_public]] B { + virtual void foo() {} +}; + +void test_2(B *p) { + // B has public LTO visibility, so no need for type.checked.load. + p->foo(); +} + +struct __attribute__((visibility("hidden"))) C { + virtual void foo() {} + virtual void bar() {} +}; + +void test_3(C *p) { + // C has hidden visibility, so we generate type.checked.load to allow VFE. + p->foo(); +} + +void test_4(C *p) { + // When using type.checked.load, we pass the vtable offset to the intrinsic, + // rather than adding it to the pointer with a GEP. + p->bar(); +} + +void test_5(C *p, void (C::*q)(void)) { + // We also use type.checked.load for the virtual side of member function + // pointer calls. We use a GEP to calculate the address to load from and pass + // 0 as the offset to the intrinsic, because we know that the load must be + // from exactly the point marked by one of the function-type metadatas (in + // this case "_ZTSM1CFvvE.virtual"). If we passed the offset from the member + // function pointer to the intrinsic, this information would be lost. No + // codegen changes on the non-virtual side. + (p->*q)(); +} +int main() { + C *p = new C; + test_3(p); + test_4(p); + //__memprof_profile_dump(); // dump accesses to p, no dumping to access of the vtable + A *a = new A; + test_1(a); + B *b = new B; + test_2(b); + __sanitizer_set_report_path("stdout"); + //__memprof_profile_dump(); teresajohnson wrote: Are these commented out lines supposed to be here? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -63,6 +57,23 @@ struct AP64 { // Allocator64 parameters. Deliberately using a short name. template using PrimaryAllocatorASVT = SizeClassAllocator64>; using PrimaryAllocator = PrimaryAllocatorASVT; +#endif teresajohnson wrote: Combine these 2 lines into an "#else" ? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -43,6 +43,7 @@ enum class align_val_t : size_t {}; ReportOutOfMemory(size, ); \ return res; +#if !SANITIZER_APPLE teresajohnson wrote: How do operator new and delete get intercepted on Apple? Also, are OPERATOR_NEW_BODY* and OPERATOR_DELETE_BODY* macros needed if these interceptors are not included - can the whole thing be put under "#if !SANITIZER_APPLE"? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
@@ -279,13 +286,54 @@ struct Allocator { Print(Value->mib, Key, bool(Arg)); } + using SegmentEntry = ::llvm::memprof::SegmentEntry; void FinishAndWrite() { if (print_text && common_flags()->print_module_map) DumpProcessMap(); allocator.ForceLock(); InsertLiveBlocks(); +#if SANITIZER_APPLE teresajohnson wrote: Which part of this handling is Apple-specific? https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
https://github.com/teresajohnson commented: Thanks for sending the patch! I took an initial look through and have some comments/questions. @snehasish may also be interested in taking a look. https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Support MemProf on darwin (PR #69640)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/69640 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [LTO] A static relocation model can override the PIC level wrt treating external address as directly accessible (PR #65512)
teresajohnson wrote: @MaskRay @nickdesaulniers since they authored and reviewed [e018cbf7208](https://github.com/llvm/llvm-project/commit/e018cbf7208b3d34f18997ddee84c66cee32fb1b), respectively https://github.com/llvm/llvm-project/pull/65512 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [IRPGO][ValueProfile] Instrument virtual table address that could be used to do virtual table address comparision for indirect-call-promotion. (PR #66825)
teresajohnson wrote: > > Interesting work! On my side I was thinking about a different approach > > using existing profiling and the whole program devirtualization (WPD) > > framework: > > > > 1. Existing profiling identifies what targets an indirect call goes to > > 2. With WPD information, we can identify if the targets are member function > > 3. If this is a member function unique to a single vtable then we can elide > > the second load and be functionally equivalent > > > > Directly value profiling the vtable captures more opportunities where the > > member function can exist in more than 1 vtable however getting that > > information in sample profiling is trickier which makes the previous > > approach more appealing. > > Thanks for sharing thoughts! > > I read a design doc from @teresajohnson that compares this two options; the > doc points out type instrumentation could tell type distributions more > accurately (obviously with instrumentation runtime overhead) > > For example, `func` could be attributed back to more than one classes in the > following class hierarchy, even if only one class is hot at runtime. IR > instrumentation tells there is only one hot type and could insert one vtable > comparison (and fallback to the original "load func -> call" path). If the > type information is reversely reasoned from virtual functions, additional > comparisons for non-hot types might be inserted in a hot code region. > > ``` > class Base { >public: > virtual void func(); > }; > > class Derived : public Base { >public: >// inherit func() without overriding > }; > ``` Yes there are tradeoffs to doing this purely with whole program class hierarchy analysis vs with profiled type info, and in fact they can be complementary. For example, the profile info can indicate what order to do the vtable comparisons (i.e. descending order of hotness, as we do for vfunc comparisons in current ICP), while WP CHA can be used to determine when no fallback is required. Also, another advantage of doing this with profiling is also that it does not require WP visibility, which may be difficult to guarantee. https://github.com/llvm/llvm-project/pull/66825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 65e57bb - [FunctionImport] Reduce string duplication (NFC)
Author: Teresa Johnson Date: 2023-08-04T14:43:11-07:00 New Revision: 65e57bbed06d55cab7bb64d54891d33ccb2d4159 URL: https://github.com/llvm/llvm-project/commit/65e57bbed06d55cab7bb64d54891d33ccb2d4159 DIFF: https://github.com/llvm/llvm-project/commit/65e57bbed06d55cab7bb64d54891d33ccb2d4159.diff LOG: [FunctionImport] Reduce string duplication (NFC) The import/export maps, and the ModuleToDefinedGVSummaries map, are all indexed by module paths, which are StringRef obtained from the module summary index, which already has a data structure than owns these strings (the ModulePathStringTable). Because these other maps are also StringMap, which makes a copy of the string key, we were keeping multiple extra copies of the module paths, leading to memory overhead. Change these to DenseMap keyed by StringRef, and document that the strings are owned by the index. The only exception is the llvm-link tool which synthesizes an import list from command line options, and I have added a string cache to maintain ownership there. I measured around 5% memory reduction in the thin link of a large binary. Differential Revision: https://reviews.llvm.org/D156580 Added: Modified: clang/lib/CodeGen/BackendUtil.cpp llvm/include/llvm/LTO/LTO.h llvm/include/llvm/Transforms/IPO/FunctionImport.h llvm/lib/LTO/LTO.cpp llvm/lib/LTO/ThinLTOCodeGenerator.cpp llvm/lib/Transforms/IPO/FunctionImport.cpp llvm/tools/llvm-link/llvm-link.cpp Removed: diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cda03d69522d74..e1aa697a7747ff 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1169,7 +1169,7 @@ static void runThinLTOBackend( const clang::TargetOptions , const LangOptions , std::unique_ptr OS, std::string SampleProfile, std::string ProfileRemapping, BackendAction Action) { - StringMap> + DenseMap> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 150b31e3e8e40c..be85c40983475f 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -196,7 +196,7 @@ class InputFile { /// create a ThinBackend using one of the create*ThinBackend() functions below. using ThinBackend = std::function( const Config , ModuleSummaryIndex , -StringMap , +DenseMap , AddStreamFn AddStream, FileCache Cache)>; /// This ThinBackend runs the individual backend jobs in-process. diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 3e4b3eb30e77b2..559418db05933e 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -94,8 +94,11 @@ class FunctionImporter { /// The map contains an entry for every module to import from, the key being /// the module identifier to pass to the ModuleLoader. The value is the set of - /// functions to import. - using ImportMapTy = StringMap; + /// functions to import. The module identifier strings must be owned + /// elsewhere, typically by the in-memory ModuleSummaryIndex the importing + /// decisions are made from (the module path for each summary is owned by the + /// index's module path string table). + using ImportMapTy = DenseMap; /// The set contains an entry for every global value the module exports. using ExportSetTy = DenseSet; @@ -147,13 +150,18 @@ class FunctionImportPass : public PassInfoMixin { /// \p ExportLists contains for each Module the set of globals (GUID) that will /// be imported by another module, or referenced by such a function. I.e. this /// is the set of globals that need to be promoted/renamed appropriately. +/// +/// The module identifier strings that are the keys of the above two maps +/// are owned by the in-memory ModuleSummaryIndex the importing decisions +/// are made from (the module path for each summary is owned by the index's +/// module path string table). void ComputeCrossModuleImport( const ModuleSummaryIndex , -const StringMap , +const DenseMap , function_ref isPrevailing, -StringMap , -StringMap ); +DenseMap , +DenseMap ); /// Compute all the imports for the given module using the Index. /// @@ -225,7 +233,7 @@ bool convertToDeclaration(GlobalValue ); /// stable order for bitcode emission. void gatherImportedSummariesForModule( StringRef ModulePath, -const StringMap , +const DenseMap , const FunctionImporter::ImportMapTy , std::map ); diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index bc8abb751221ce..06cf3f294513c1 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -178,7 +178,7 @@ void llvm::computeLTOCacheKey(
[clang] 546ec64 - Restore "[MemProf] Use new option/pass for profile feedback and matching"
Author: Teresa Johnson Date: 2023-07-11T13:16:20-07:00 New Revision: 546ec641b4b1bbbf9e66a53983b635fe85d365e6 URL: https://github.com/llvm/llvm-project/commit/546ec641b4b1bbbf9e66a53983b635fe85d365e6 DIFF: https://github.com/llvm/llvm-project/commit/546ec641b4b1bbbf9e66a53983b635fe85d365e6.diff LOG: Restore "[MemProf] Use new option/pass for profile feedback and matching" This restores commit b4a82b62258c5f650a1cccf5b179933e6bae4867, reverted in 3ab7ef28eebf9019eb3d3c4efd7ebfd160106bb1 because it was thought to cause a bot failure, which ended up being unrelated to this patch set. Differential Revision: https://reviews.llvm.org/D154856 Added: Modified: clang/include/clang/Basic/CodeGenOptions.h clang/include/clang/Driver/Options.td clang/lib/CodeGen/BackendUtil.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/test/CodeGen/memprof.cpp clang/test/Driver/fmemprof.cpp llvm/include/llvm/Support/PGOOptions.h llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h llvm/lib/LTO/LTOBackend.cpp llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassBuilderPipelines.cpp llvm/lib/Passes/PassRegistry.def llvm/lib/Support/PGOOptions.cpp llvm/lib/Transforms/Instrumentation/MemProfiler.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp llvm/test/Transforms/PGOProfile/memprof.ll llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll llvm/tools/opt/NewPMDriver.cpp Removed: diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index b0f22411e1ad20..fadfff48582eea 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -282,6 +282,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Name of the profile file to use as output for with -fmemory-profile. std::string MemoryProfileOutput; + /// Name of the profile file to use as input for -fmemory-profile-use. + std::string MemoryProfileUsePath; + /// Name of the profile file to use as input for -fprofile-instr-use std::string ProfileInstrumentUsePath; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a34eab4064997a..c5230d11baeddf 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1772,6 +1772,10 @@ defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " hea def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Enable heap memory profiling and dump results into ">; +def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, +Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, +HelpText<"Use memory profile for profile-guided memory optimization">, +MarshallingInfoString>; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 458756509b3a3d..06af08023d1be9 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -762,31 +762,37 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PGOOpt = PGOOptions( CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() : CodeGenOpts.InstrProfileOutput, -"", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction, -CodeGenOpts.DebugInfoForProfiling); +"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr, +PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; -PGOOpt = -PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse, - CSAction, CodeGenOpts.DebugInfoForProfiling); +PGOOpt = PGOOptions( +CodeGenOpts.ProfileInstrumentUsePath, "", +CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS, +PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use PGOOpt = PGOOptions( CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, -VFS, PGOOptions::SampleUse, PGOOptions::NoCSAction, -CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); +CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse, +PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, +CodeGenOpts.PseudoProbeForProfiling); +
[clang] b4a82b6 - [MemProf] Use new option/pass for profile feedback and matching
Author: Teresa Johnson Date: 2023-07-10T16:42:56-07:00 New Revision: b4a82b62258c5f650a1cccf5b179933e6bae4867 URL: https://github.com/llvm/llvm-project/commit/b4a82b62258c5f650a1cccf5b179933e6bae4867 DIFF: https://github.com/llvm/llvm-project/commit/b4a82b62258c5f650a1cccf5b179933e6bae4867.diff LOG: [MemProf] Use new option/pass for profile feedback and matching Previously the MemProf profile was expected to be in the same profile file as a normal PGO profile, passed via the usual -fprofile-use= option, and was matched in the same pass. To simplify profile preparation, since the raw MemProf profile requires the binary for symbolization and may be simpler to index separately from the raw PGO profile, and also to enable providing a MemProf profile for a SamplePGO build, separate out the MemProf feedback option and matching pass. This patch adds the -fmemory-profile-use=${file} option, and the provided file is passed down to LLVM and ultimately used in a new MemProfUsePass which performs the matching of just the memory profile contents of that file. Note that a single profile file containing both normal PGO and MemProf profile data is still supported, and the relevant profile data is matched by the appropriate matching pass(es) based on which option(s) the profile is provided with (the same profile file can be supplied to both feedback options). Differential Revision: https://reviews.llvm.org/D154856 Added: Modified: clang/include/clang/Basic/CodeGenOptions.h clang/include/clang/Driver/Options.td clang/lib/CodeGen/BackendUtil.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/test/CodeGen/memprof.cpp clang/test/Driver/fmemprof.cpp llvm/include/llvm/Support/PGOOptions.h llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h llvm/lib/LTO/LTOBackend.cpp llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassBuilderPipelines.cpp llvm/lib/Passes/PassRegistry.def llvm/lib/Support/PGOOptions.cpp llvm/lib/Transforms/Instrumentation/MemProfiler.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp llvm/test/Transforms/PGOProfile/memprof.ll llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll llvm/tools/opt/NewPMDriver.cpp Removed: diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index b0f22411e1ad20..fadfff48582eea 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -282,6 +282,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Name of the profile file to use as output for with -fmemory-profile. std::string MemoryProfileOutput; + /// Name of the profile file to use as input for -fmemory-profile-use. + std::string MemoryProfileUsePath; + /// Name of the profile file to use as input for -fprofile-instr-use std::string ProfileInstrumentUsePath; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a34eab4064997a..c5230d11baeddf 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1772,6 +1772,10 @@ defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " hea def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Enable heap memory profiling and dump results into ">; +def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, +Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, +HelpText<"Use memory profile for profile-guided memory optimization">, +MarshallingInfoString>; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 458756509b3a3d..06af08023d1be9 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -762,31 +762,37 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PGOOpt = PGOOptions( CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() : CodeGenOpts.InstrProfileOutput, -"", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction, -CodeGenOpts.DebugInfoForProfiling); +"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr, +PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; -PGOOpt = -PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse, - CSAction,
[clang] f354e97 - [MemProf] Clean up MemProf instrumentation pass invocation
Author: Teresa Johnson Date: 2023-05-26T17:38:49-07:00 New Revision: f354e971b09c244147ff59eb65b34487755598c0 URL: https://github.com/llvm/llvm-project/commit/f354e971b09c244147ff59eb65b34487755598c0 DIFF: https://github.com/llvm/llvm-project/commit/f354e971b09c244147ff59eb65b34487755598c0.diff LOG: [MemProf] Clean up MemProf instrumentation pass invocation First, removes the invocation of the memprof instrumentation passes from the end of the module simplification pass builder, where it doesn't really belong. However, it turns out that this was never being invoked, as it is guarded by an internal option not used anywhere (even tests). These passes are actually added via clang under the -fmemory-profile option. Changed this to add via the EP callback interface, similar to the sanitizer passes. They are added to the EP for the end of the optimization pipeline, which is roughly where they were being added already (end of the pre-LTO link pipelines and non-LTO optimization pipeline). Ideally we should plumb the output file through to LLVM and set it up there, so I have added a TODO. Differential Revision: https://reviews.llvm.org/D151593 Added: Modified: clang/lib/CodeGen/BackendUtil.cpp llvm/lib/Passes/PassBuilderPipelines.cpp Removed: diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d62d00a156f1c..d4498ebaf8dea 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -992,6 +992,16 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(InstrProfiling(*Options, false)); }); +// TODO: Consider passing the MemoryProfileOutput to the pass builder via +// the PGOOptions, and set this up there. +if (!CodeGenOpts.MemoryProfileOutput.empty()) { + PB.registerOptimizerLastEPCallback( + [](ModulePassManager , OptimizationLevel Level) { +MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); +MPM.addPass(ModuleMemProfilerPass()); + }); +} + if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { @@ -999,11 +1009,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline( } else { MPM = PB.buildPerModuleDefaultPipeline(Level); } - -if (!CodeGenOpts.MemoryProfileOutput.empty()) { - MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); - MPM.addPass(ModuleMemProfilerPass()); -} } // Add a verifier pass if requested. We don't have to do this if the action diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 05bb4596b988c..34d3b9d7467e8 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -155,9 +155,6 @@ static cl::opt cl::Hidden, cl::desc("Enable inline deferral during PGO")); -static cl::opt EnableMemProfiler("enable-mem-prof", cl::Hidden, - cl::desc("Enable memory profiler")); - static cl::opt EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner")); @@ -1122,11 +1119,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(GlobalOptPass()); MPM.addPass(GlobalDCEPass()); - if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { -MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); -MPM.addPass(ModuleMemProfilerPass()); - } - return MPM; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 9e280c4 - [MemProf] Update hot/cold information after importing
Author: Teresa Johnson Date: 2023-05-10T14:58:35-07:00 New Revision: 9e280c47588bfaf008a5fb091cd47df92b9c4264 URL: https://github.com/llvm/llvm-project/commit/9e280c47588bfaf008a5fb091cd47df92b9c4264 DIFF: https://github.com/llvm/llvm-project/commit/9e280c47588bfaf008a5fb091cd47df92b9c4264.diff LOG: [MemProf] Update hot/cold information after importing The support added by D149215 to remove memprof metadata and attributes if we don't link with an allocator supporting hot/cold operator new interfaces did not update imported code. Move the update handling later in the ThinLTO backend to just after importing, and update the test to check this case. Differential Revision: https://reviews.llvm.org/D150295 Added: Modified: clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll llvm/lib/LTO/LTOBackend.cpp llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll Removed: diff --git a/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll index e213fbaf3fa14..08c1a2946971c 100644 --- a/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll +++ b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll @@ -22,7 +22,7 @@ ; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK-IR +; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck %s --check-prefix=CHECK-IR ; CHECK-IR: !memprof {{.*}} !callsite ; CHECK-IR: "memprof"="cold" @@ -42,7 +42,7 @@ ; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s \ +; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck %s \ ; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ ; RUN: --implicit-check-not "memprof"="cold" diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index a18963fcaf85d..a089cbe63963e 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -565,8 +565,6 @@ Error lto::thinBackend(const Config , unsigned Task, AddStreamFn AddStream, // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); - updateMemProfAttributes(Mod, CombinedIndex); - updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility()); if (Conf.CodeGenOnly) { @@ -653,6 +651,9 @@ Error lto::thinBackend(const Config , unsigned Task, AddStreamFn AddStream, if (Error Err = Importer.importFunctions(Mod, ImportList).takeError()) return Err; + // Do this after any importing so that imported code is updated. + updateMemProfAttributes(Mod, CombinedIndex); + if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll b/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll index 9e69377d14443..7a4d860d8d0d9 100644 --- a/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll +++ b/llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll @@ -3,37 +3,53 @@ ;; from being removed from the LTO backend, and vice versa without passing ;; -supports-hot-cold-new. +; RUN: split-file %s %t + ;; First check with -supports-hot-cold-new. -; RUN: opt -thinlto-bc %s >%t.o -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: opt -thinlto-bc %t/main.ll >%t/main.o +; RUN: opt -thinlto-bc %t/foo.ll >%t/foo.o +; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_Znam, \ +; RUN: -r=%t/main.o,main,plx \ +; RUN: -r=%t/main.o,bar,plx \ +; RUN: -r=%t/main.o,foo, \ +; RUN: -r=%t/main.o,_Znam, \ +; RUN: -r=%t/foo.o,foo,plx \ +; RUN: -r=%t/foo.o,_Znam, \ ; RUN: -memprof-dump-ccg \ ; RUN: -save-temps \ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP ; DUMP: Callsite Context Graph: -; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s --check-prefix=IR +; RUN: llvm-dis %t.out.1.3.import.bc -o - | FileCheck %s --check-prefix=IR +; IR: @main() +; IR: !memprof {{.*}} !callsite +; IR: @_Znam(i64 0) #[[ATTR:[0-9]+]] +; IR: @bar() ; IR: !memprof {{.*}} !callsite -; IR: "memprof"="cold" +; IR: @_Znam(i64 0) #[[ATTR:[0-9]+]] +; IR: attributes #[[ATTR]] = { "memprof"="cold" } ;; Next check without -supports-hot-cold-new, we should not perform ;; context disambiguation, and we should strip memprof metadata and ;; attributes before optimization. -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_Znam, \ +; RUN: llvm-lto2 run %t/main.o %t/foo.o
[clang] 1768898 - [MemProf] Control availability of hot/cold operator new from LTO link
Author: Teresa Johnson Date: 2023-05-08T08:02:21-07:00 New Revision: 176889868024d98db032842bc47b416997d9e349 URL: https://github.com/llvm/llvm-project/commit/176889868024d98db032842bc47b416997d9e349 DIFF: https://github.com/llvm/llvm-project/commit/176889868024d98db032842bc47b416997d9e349.diff LOG: [MemProf] Control availability of hot/cold operator new from LTO link Adds an LTO option to indicate that whether we are linking with an allocator that supports hot/cold operator new interfaces. If not, at the start of the LTO backends any existing memprof hot/cold attributes are removed from the IR, and we also remove memprof metadata so that post-LTO inlining doesn't add any new attributes. This is done via setting a new flag in the module summary index. It is important to communicate via the index to the LTO backends so that distributed ThinLTO handles this correctly, as they are invoked by separate clang processes and the combined index is how we communicate information from the LTO link. Specifically, for distributed ThinLTO the LTO related processes look like: ``` # Thin link: $ lld --thinlto-index-only obj1.o ... objN.o -llib ... # ThinLTO backends: $ clang -x ir obj1.o -fthinlto-index=obj1.o.thinlto.bc -c -O2 ... $ clang -x ir objN.o -fthinlto-index=objN.o.thinlto.bc -c -O2 ``` It is during the thin link (lld --thinlto-index-only) that we have visibility into linker dependences and want to be able to pass the new option via -Wl,-supports-hot-cold-new. This will be recorded in the summary indexes created for the distributed backend processes (*.thinlto.bc) and queried from there, so that we don't need to know during those individual clang backends what allocation library was linked. Since in-process ThinLTO and regular LTO also use a combined index, for consistency we query the flag out of the index in all LTO backends. Additionally, when the LTO option is disabled, exit early from the MemProfContextDisambiguation handling performed during LTO, as this is unnecessary. Depends on D149117 and D149192. Differential Revision: https://reviews.llvm.org/D149215 Added: clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll Modified: llvm/include/llvm/IR/ModuleSummaryIndex.h llvm/include/llvm/LTO/LTO.h llvm/lib/Bitcode/Reader/BitcodeReader.cpp llvm/lib/IR/ModuleSummaryIndex.cpp llvm/lib/LTO/LTO.cpp llvm/lib/LTO/LTOBackend.cpp llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp llvm/test/ThinLTO/X86/memprof-basic.ll llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll llvm/test/ThinLTO/X86/memprof-indirectcall.ll llvm/test/ThinLTO/X86/memprof-inlined.ll llvm/test/ThinLTO/X86/memprof-inlined2.ll llvm/test/Transforms/MemProfContextDisambiguation/basic.ll llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll Removed: diff --git a/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll new file mode 100644 index ..e213fbaf3fa1 --- /dev/null +++ b/clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll @@ -0,0 +1,70 @@ +; REQUIRES: x86-registered-target + +;; Test that passing -supports-hot-cold-new to the thin link prevents memprof +;; metadata and attributes from being removed from the distributed ThinLTO +;; backend, and vice versa without passing -supports-hot-cold-new. + +;; First check with -supports-hot-cold-new. +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -supports-hot-cold-new \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -o %t.out + +;; Ensure that the index file reflects the -supports-hot-cold-new, as that is +;; how the ThinLTO backend behavior is controlled. +; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-ON +;; Flags are printed in decimal, but this corresponds to 0x161, and 0x100 is +;; the value indicating -supports-hot-cold-new was enabled. +; CHECK-INDEX-ON: flags: 353 + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj + +; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK-IR +; CHECK-IR:
[clang] 2cc0c0d - [MemProf] Recognize hot/cold operator new as replaceable allocations
Author: Teresa Johnson Date: 2023-05-01T13:37:40-07:00 New Revision: 2cc0c0de802178dc7e5408497e2ec53b6c9728fa URL: https://github.com/llvm/llvm-project/commit/2cc0c0de802178dc7e5408497e2ec53b6c9728fa DIFF: https://github.com/llvm/llvm-project/commit/2cc0c0de802178dc7e5408497e2ec53b6c9728fa.diff LOG: [MemProf] Recognize hot/cold operator new as replaceable allocations Follow up to LLVM-side change to allow conversion to hot/cold hinted operator new, not yet standard but supported by open source tcmalloc. The LLVM change in a35206d78280e0ebcf48cd21bc5fff5c3b9c73fa added the necessary support to recognize these as library functions, and we subsequently found that a clang-side change is needed to give them builtin/nobuiltin attributes as appropriate so they have consistent removeability. Based on discussion with Richard Smith, converted the parameter type to a reserved identifier (39f7b48671dae5fbe3afc49f33f50c2b6340bb89) and added support in this patch to recognize it in clang. Differential Revision: https://reviews.llvm.org/D149600 Added: clang/test/CodeGenCXX/new_hot_cold.cpp Modified: clang/lib/AST/Decl.cpp Removed: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index b728917fdda6d..d284df29f3b33 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3245,7 +3245,7 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction( return false; const auto *FPT = getType()->castAs(); - if (FPT->getNumParams() == 0 || FPT->getNumParams() > 3 || FPT->isVariadic()) + if (FPT->getNumParams() == 0 || FPT->getNumParams() > 4 || FPT->isVariadic()) return false; // If this is a single-parameter function, it must be a replaceable global @@ -3280,8 +3280,8 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction( *AlignmentParam = Params; } - // Finally, if this is not a sized delete, the final parameter can - // be a 'const std::nothrow_t&'. + // If this is not a sized delete, the next parameter can be a + // 'const std::nothrow_t&'. if (!IsSizedDelete && !Ty.isNull() && Ty->isReferenceType()) { Ty = Ty->getPointeeType(); if (Ty.getCVRQualifiers() != Qualifiers::Const) @@ -3293,6 +3293,19 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction( } } + // Finally, recognize the not yet standard versions of new that take a + // hot/cold allocation hint (__hot_cold_t). These are currently supported by + // tcmalloc (see + // https://github.com/google/tcmalloc/blob/220043886d4e2efff7a5702d5172cb8065253664/tcmalloc/malloc_extension.h#L53). + if (!IsSizedDelete && !Ty.isNull() && Ty->isEnumeralType()) { +QualType T = Ty; +while (const auto *TD = T->getAs()) + T = TD->getDecl()->getUnderlyingType(); +IdentifierInfo *II = T->getAs()->getDecl()->getIdentifier(); +if (II && II->isStr("__hot_cold_t")) + Consume(); + } + return Params == FPT->getNumParams(); } diff --git a/clang/test/CodeGenCXX/new_hot_cold.cpp b/clang/test/CodeGenCXX/new_hot_cold.cpp new file mode 100644 index 0..014e815201485 --- /dev/null +++ b/clang/test/CodeGenCXX/new_hot_cold.cpp @@ -0,0 +1,130 @@ +// Test to check that the appropriate attributes are added to the __hot_cold_t +// versions of operator new. + +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -faligned-allocation -emit-llvm -o - | FileCheck %s + +typedef __typeof__(sizeof(0)) size_t; + +namespace std { +struct nothrow_t {}; +enum class align_val_t : size_t; +} // namespace std +std::nothrow_t nothrow; + +typedef unsigned char uint8_t; + +// See the following link for how this type is declared in tcmalloc: +// https://github.com/google/tcmalloc/blob/220043886d4e2efff7a5702d5172cb8065253664/tcmalloc/malloc_extension.h#L53. +enum class __hot_cold_t : uint8_t; + +// Test handling of declaration that uses a type alias, ensuring that it still +// recognizes the expected __hot_cold_t type name. +namespace malloc_namespace { +using hot_cold_t = __hot_cold_t; +} + +void *operator new(size_t size, + malloc_namespace::hot_cold_t hot_cold) noexcept(false); +void *operator new[](size_t size, + malloc_namespace::hot_cold_t hot_cold) noexcept(false); +void *operator new(size_t size, const std::nothrow_t &, + malloc_namespace::hot_cold_t hot_cold) noexcept; +void *operator new[](size_t size, const std::nothrow_t &, + malloc_namespace::hot_cold_t hot_cold) noexcept; +void *operator new(size_t size, std::align_val_t alignment, + malloc_namespace::hot_cold_t hot_cold) noexcept(false); +void *operator new[](size_t size, std::align_val_t alignment, + malloc_namespace::hot_cold_t hot_cold) noexcept(false); +void *operator new(size_t size, std::align_val_t alignment, + const std::nothrow_t &, +
[clang] e5b0276 - [ThinLTO] Reduce pipeline clang test to avoid churn from LLVM changes
Author: Teresa Johnson Date: 2023-04-25T13:33:09-07:00 New Revision: e5b0276dc882f7c5b2a349e2f02abf16b1d41322 URL: https://github.com/llvm/llvm-project/commit/e5b0276dc882f7c5b2a349e2f02abf16b1d41322 DIFF: https://github.com/llvm/llvm-project/commit/e5b0276dc882f7c5b2a349e2f02abf16b1d41322.diff LOG: [ThinLTO] Reduce pipeline clang test to avoid churn from LLVM changes This test was added in D72538, along with multiple LLVM pipeline tests, to ensure that distributed ThinLTO backends invoked via clang set up the expected ThinLTO optimization pipeline. However, this introduces churn to clang tests from LLVM pipeline changes (see recent comment in that patch). Since the full pipeline setup is tested by LLVM, I have changed this test to simply look for a single pass that is only invoked during LTO backends, to make sure that clang is provoking the an LTO backend pipeline setup. Added: Modified: clang/test/CodeGen/thinlto-distributed-newpm.ll Removed: diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index c85ce0f6a27d..9967586fea9b 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -1,7 +1,12 @@ ; FIXME: This test should use CHECK-NEXT to keep up-to-date. ; REQUIRES: x86-registered-target -; Validate ThinLTO post link pipeline at O2 and O3 +;; Validate that we set up the ThinLTO post link pipeline at O2 and O3 +;; for a ThinLTO distributed backend invoked via clang. +;; Since LLVM tests already more thoroughly test this pipeline, and to +;; avoid making this clang test too sensitive to LLVM pipeline changes, +;; here we simply confirm that an LTO backend-specific pass is getting +;; invoked (WPD). ; RUN: opt -thinlto-bc -o %t.o %s @@ -17,94 +22,9 @@ ; RUN: %clang -target x86_64-grtev4-linux-gnu \ ; RUN: -O3 -Xclang -fdebug-pass-manager \ ; RUN: -c -fthinlto-index=%t.o.thinlto.bc \ -; RUN: -o %t.native.o -x ir %t.o 2>&1 | FileCheck -check-prefixes=CHECK-O,CHECK-O3 %s --dump-input=fail +; RUN: -o %t.native.o -x ir %t.o 2>&1 | FileCheck -check-prefixes=CHECK-O %s --dump-input=fail ; CHECK-O: Running pass: WholeProgramDevirtPass -; CHECK-O: Running pass: LowerTypeTestsPass -; CHECK-O: Running pass: PGOIndirectCallPromotion -; CHECK-O: Running pass: InferFunctionAttrsPass -; CHECK-O: Running pass: LowerExpectIntrinsicPass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: SROAPass on main -; CHECK-O: Running pass: EarlyCSEPass on main -; CHECK-O3: Running pass: CallSiteSplittingPass on main -; CHECK-O: Running pass: LowerTypeTestsPass -; CHECK-O: Running pass: IPSCCPPass -; CHECK-O: Running pass: CalledValuePropagationPass -; CHECK-O: Running pass: GlobalOptPass -; CHECK-O: Running pass: PromotePass -; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: InlinerPass on (main) -; CHECK-O: Running pass: PostOrderFunctionAttrsPass on (main) -; CHECK-O3: Running pass: ArgumentPromotionPass on (main) -; CHECK-O: Running pass: SROAPass on main -; CHECK-O: Running pass: EarlyCSEPass on main -; CHECK-O: Running pass: SpeculativeExecutionPass on main -; CHECK-O: Running pass: JumpThreadingPass on main -; CHECK-O: Running pass: CorrelatedValuePropagationPass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O3: Running pass: AggressiveInstCombinePass on main -; CHECK-O: Running pass: LibCallsShrinkWrapPass on main -; CHECK-O: Running pass: TailCallElimPass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: ReassociatePass on main -; CHECK-O: Running pass: LoopSimplifyPass on main -; CHECK-O: Running pass: LCSSAPass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O: Running pass: LoopSimplifyPass on main -; CHECK-O: Running pass: LCSSAPass on main -; CHECK-O: Running pass: SROAPass on main -; CHECK-O: Running pass: MergedLoadStoreMotionPass on main -; CHECK-O: Running pass: GVNPass on main -; CHECK-O: Running pass: SCCPPass on main -; CHECK-O: Running pass: BDCEPass on main -; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O: Running pass: JumpThreadingPass on main -; CHECK-O: Running pass: CorrelatedValuePropagationPass on main -; CHECK-O: Running pass: ADCEPass on main -; CHECK-O: Running pass: MemCpyOptPass on main -; CHECK-O: Running pass: DSEPass on main -; CHECK-O: Running pass: LoopSimplifyPass on main -; CHECK-O: Running pass: LCSSAPass on main -; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O: Running pass: DeadArgumentEliminationPass -; CHECK-O: Running pass: GlobalOptPass -; CHECK-O: Running pass: GlobalDCEPass -;
[clang] fb3b392 - [docs] Add more complete documentation for -f[no]split-lto-unit
Author: Teresa Johnson Date: 2023-03-13T11:33:46-07:00 New Revision: fb3b392264a099270b77ba7ebfb9d32817fc51d6 URL: https://github.com/llvm/llvm-project/commit/fb3b392264a099270b77ba7ebfb9d32817fc51d6 DIFF: https://github.com/llvm/llvm-project/commit/fb3b392264a099270b77ba7ebfb9d32817fc51d6.diff LOG: [docs] Add more complete documentation for -f[no]split-lto-unit Option was added in D53891, and only has basic documentation added later in 5168ddfac44206e94f7ddd484d1cf03dee320fa7. Add more extensive documentation with links to related docs. Differential Revision: https://reviews.llvm.org/D145951 Added: Modified: clang/docs/UsersManual.rst Removed: diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 0d5e960050c25..77b1c938c6ad4 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2013,6 +2013,24 @@ are listed below. devirtualization and virtual constant propagation, for classes with :doc:`hidden LTO visibility `. Requires ``-flto``. +.. option:: -f[no]split-lto-unit + + Controls splitting the :doc:`LTO unit ` into regular LTO and + :doc:`ThinLTO` portions, when compiling with -flto=thin. Defaults to false + unless ``-fsanitize=cfi`` or ``-fwhole-program-vtables`` are specified, in + which case it defaults to true. Splitting is required with ``fsanitize=cfi``, + and it is an error to disable via ``-fno-split-lto-unit``. Splitting is + optional with ``-fwhole-program-vtables``, however, it enables more + aggressive whole program vtable optimizations (specifically virtual constant + propagation). + + When enabled, vtable definitions and select virtual functions are placed + in the split regular LTO module, enabling more aggressive whole program + vtable optimizations required for CFI and virtual constant propagation. + However, this can increase the LTO link time and memory requirements over + pure ThinLTO, as all split regular LTO modules are merged and LTO linked + with regular LTO. + .. option:: -fforce-emit-vtables In order to improve devirtualization, forces emitting of vtables even in ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] ee73d24 - [MemProf] Collect access density statistics during profiling
Author: Teresa Johnson Date: 2023-01-12T17:53:23-08:00 New Revision: ee73d240ab1dc026f99e7e9062c921928d2b138c URL: https://github.com/llvm/llvm-project/commit/ee73d240ab1dc026f99e7e9062c921928d2b138c DIFF: https://github.com/llvm/llvm-project/commit/ee73d240ab1dc026f99e7e9062c921928d2b138c.diff LOG: [MemProf] Collect access density statistics during profiling Track min/max/avg access density (accesses per byte and accesses per byte per lifetime second) metrics directly during profiling. This allows more accurate use of these metrics in profile analysis and use, instead of trying to compute them from already aggregated data in the profile. This required regenerating some of the raw profile and executable inputs for a few tests. While here, make the llvm-profdata memprof tests more resilient to differences in things like memory mapping, timestamps and cpu ids to make future test updates easier. Differential Revision: https://reviews.llvm.org/D141558 Added: Modified: clang/test/CodeGen/Inputs/memprof.exe clang/test/CodeGen/Inputs/memprof.memprofraw compiler-rt/include/profile/MIBEntryDef.inc compiler-rt/include/profile/MemProfData.inc llvm/include/llvm/ProfileData/MIBEntryDef.inc llvm/include/llvm/ProfileData/MemProfData.inc llvm/test/Transforms/PGOProfile/Inputs/memprof.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw llvm/test/tools/llvm-profdata/memprof-basic.test llvm/test/tools/llvm-profdata/memprof-inline.test llvm/test/tools/llvm-profdata/memprof-multi.test Removed: diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe index 955c0d6b0e87a..c3b28a8ed9d3d 100755 Binary files a/clang/test/CodeGen/Inputs/memprof.exe and b/clang/test/CodeGen/Inputs/memprof.exe diff er diff --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw index 07a3310c122af..7ae89bff648af 100644 Binary files a/clang/test/CodeGen/Inputs/memprof.memprofraw and b/clang/test/CodeGen/Inputs/memprof.memprofraw diff er diff --git a/compiler-rt/include/profile/MIBEntryDef.inc b/compiler-rt/include/profile/MIBEntryDef.inc index f5c6f0e4924b2..794163ae10386 100644 --- a/compiler-rt/include/profile/MIBEntryDef.inc +++ b/compiler-rt/include/profile/MIBEntryDef.inc @@ -45,3 +45,9 @@ MIBEntryDef(NumLifetimeOverlaps = 16, NumLifetimeOverlaps, uint32_t) MIBEntryDef(NumSameAllocCpu = 17, NumSameAllocCpu, uint32_t) MIBEntryDef(NumSameDeallocCpu = 18, NumSameDeallocCpu, uint32_t) MIBEntryDef(DataTypeId = 19, DataTypeId, uint64_t) +MIBEntryDef(TotalAccessDensity = 20, TotalAccessDensity, uint64_t) +MIBEntryDef(MinAccessDensity = 21, MinAccessDensity, uint32_t) +MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t) +MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t) +MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t) +MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t) diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc index ca354ee332929..c533073da751f 100644 --- a/compiler-rt/include/profile/MemProfData.inc +++ b/compiler-rt/include/profile/MemProfData.inc @@ -32,7 +32,7 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 1ULL +#define MEMPROF_RAW_VERSION 2ULL namespace llvm { namespace memprof { @@ -127,6 +127,19 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs, TotalLifetime = DeallocTimestamp - AllocTimestamp; MinLifetime = TotalLifetime; MaxLifetime = TotalLifetime; + // Access density is accesses per byte. Multiply by 100 to include the + // fractional part. + TotalAccessDensity = AccessCount * 100 / Size; + MinAccessDensity = TotalAccessDensity; + MaxAccessDensity = TotalAccessDensity; + // Lifetime access density is the access density per second of lifetime. + // Multiply by 1000 to convert denominator lifetime to seconds (using a + // minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first + // to reduce truncations to 0. + TotalLifetimeAccessDensity = + TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1); + MinLifetimeAccessDensity = TotalLifetimeAccessDensity; + MaxLifetimeAccessDensity =
[clang] b1926f3 - Restore "[MemProf] Memprof profile matching and annotation"
Author: Teresa Johnson Date: 2022-09-23T11:38:47-07:00 New Revision: b1926f308f0939b365ee4940c7b1bd984b45e71a URL: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a DIFF: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a.diff LOG: Restore "[MemProf] Memprof profile matching and annotation" This reverts commit 794b7ea960ccc3222f2af582efadbc5e5c464292, and thus restores commit a212d8da94d08e229aa8d65283e4b116310bba10, and follow on fixes 0cd6763fa93159b84d70a5bb602c24996acaafaa, e9ff53d42feac7fc157718523275619a8106f2f3, and 37c6a25e9ab230e5e21fa34e246d9fec55275df0. Use a hash function (BLAKE3) instead of hash_combine/hash_code which are not guaranteed to be stable across executions. Additionally, it adds a "REQUIRES: x86_64-linux" to the tests that have raw profile inputs to avoid failures on big endian bots. Reviewers: snehasish, davidxl Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D128142 Added: clang/test/CodeGen/Inputs/memprof.exe clang/test/CodeGen/Inputs/memprof.memprofraw clang/test/CodeGen/memprof.cpp llvm/test/Transforms/PGOProfile/Inputs/memprof.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw llvm/test/Transforms/PGOProfile/memprof.ll llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll Modified: clang/lib/Frontend/CompilerInvocation.cpp llvm/include/llvm/Analysis/MemoryBuiltins.h llvm/include/llvm/ProfileData/InstrProfReader.h llvm/lib/Analysis/MemoryBuiltins.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp Removed: diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9f9241054b1ef..656e5950db988 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1306,7 +1306,10 @@ static void setPGOUseInstrumentor(CodeGenOptions , } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); - if (PGOReader->isIRLevelProfile()) { + // Currently memprof profiles are only added at the IR level. Mark the profile + // type as IR in that case as well and the subsequent matching needs to detect + // which is available (might be one or both). + if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe new file mode 100755 index 0..955c0d6b0e87a Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.exe diff er diff --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw new file mode 100644 index 0..07a3310c122af Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.memprofraw diff er diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp new file mode 100644 index 0..b246d1f086942 --- /dev/null +++ b/clang/test/CodeGen/memprof.cpp @@ -0,0 +1,38 @@ +// Test if memprof instrumentation and use pass are invoked. +// +// Instrumentation: +// Ensure Pass MemProfilerPass and ModuleMemProfilerPass are invoked. +// RUN: %clang_cc1 -O2 -fmemory-profile %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=INSTRUMENT +// INSTRUMENT: Running pass: MemProfilerPass on main +// INSTRUMENT: Running pass: ModuleMemProfilerPass on [module] + +// Avoid failures on big-endian systems that can't read the raw profile properly +// REQUIRES: x86_64-linux + +// TODO: Use text profile inputs once that is available for memprof. +// +// The following commands were used to compile the source to instrumented +// executables and collect raw binary format profiles: +// +// # Collect memory profile: +// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ +// -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ +// -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ +// memprof.cpp -o memprof.exe -fmemory-profile +// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw +// +// RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata + +// Profile use: +// Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile. +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE +// USE: Running pass: PGOInstrumentationUse on [module] + +char *foo() { + return new char[10]; +} +int main() { + char *a = foo(); + delete[] a; + return 0; +} diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h
[clang] 794b7ea - Revert "[MemProf] Memprof profile matching and annotation"
Author: Teresa Johnson Date: 2022-09-22T16:08:03-07:00 New Revision: 794b7ea960ccc3222f2af582efadbc5e5c464292 URL: https://github.com/llvm/llvm-project/commit/794b7ea960ccc3222f2af582efadbc5e5c464292 DIFF: https://github.com/llvm/llvm-project/commit/794b7ea960ccc3222f2af582efadbc5e5c464292.diff LOG: Revert "[MemProf] Memprof profile matching and annotation" This reverts commit a212d8da94d08e229aa8d65283e4b116310bba10, and follow on fixes 0cd6763fa93159b84d70a5bb602c24996acaafaa, e9ff53d42feac7fc157718523275619a8106f2f3, and 37c6a25e9ab230e5e21fa34e246d9fec55275df0. After re-reading the documentation for hash_combine, I don't think this is the appropriate hash function to use for computing the hash to use as a stack id in the metadata, since it is not guaranteed to produce stable values across executions. I have not hit this problem, but plan to switch to using an MD5 hash. I am hitting an issue with one of the bots (https://lab.llvm.org/buildbot/#/builders/171/builds/20732) where the values produced are only the lower 32 bits of the expected hash values, however, which I assume is related to the implementation of hash_combine and hash_code. I believe I fixed all of the other bot failures with the follow on fixes, which I'll merge into the new version before reapplying. Added: Modified: clang/lib/Frontend/CompilerInvocation.cpp llvm/include/llvm/Analysis/MemoryBuiltins.h llvm/include/llvm/ProfileData/InstrProfReader.h llvm/lib/Analysis/MemoryBuiltins.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp Removed: clang/test/CodeGen/Inputs/memprof.exe clang/test/CodeGen/Inputs/memprof.memprofraw clang/test/CodeGen/memprof.cpp llvm/test/Transforms/PGOProfile/Inputs/memprof.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw llvm/test/Transforms/PGOProfile/memprof.ll llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 656e5950db98..9f9241054b1e 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1306,10 +1306,7 @@ static void setPGOUseInstrumentor(CodeGenOptions , } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); - // Currently memprof profiles are only added at the IR level. Mark the profile - // type as IR in that case as well and the subsequent matching needs to detect - // which is available (might be one or both). - if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { + if (PGOReader->isIRLevelProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe deleted file mode 100755 index 955c0d6b0e87.. Binary files a/clang/test/CodeGen/Inputs/memprof.exe and /dev/null diff er diff --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw deleted file mode 100644 index 07a3310c122a.. Binary files a/clang/test/CodeGen/Inputs/memprof.memprofraw and /dev/null diff er diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp deleted file mode 100644 index 2ecb413f0d1a.. --- a/clang/test/CodeGen/memprof.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Test if memprof instrumentation and use pass are invoked. -// -// Instrumentation: -// Ensure Pass MemProfilerPass and ModuleMemProfilerPass are invoked. -// RUN: %clang_cc1 -O2 -fmemory-profile %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=INSTRUMENT -// INSTRUMENT: Running pass: MemProfilerPass on main -// INSTRUMENT: Running pass: ModuleMemProfilerPass on [module] - -// TODO: Use text profile inputs once that is available for memprof. -// -// The following commands were used to compile the source to instrumented -// executables and collect raw binary format profiles: -// -// # Collect memory profile: -// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \ -// -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \ -// -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \ -// memprof.cpp -o memprof.exe -fmemory-profile -// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw -// -// RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata - -// Profile use: -// Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE -// USE: Running pass: PGOInstrumentationUse on [module] -
[clang] a212d8d - [MemProf] Memprof profile matching and annotation
Author: Teresa Johnson Date: 2022-09-22T12:48:31-07:00 New Revision: a212d8da94d08e229aa8d65283e4b116310bba10 URL: https://github.com/llvm/llvm-project/commit/a212d8da94d08e229aa8d65283e4b116310bba10 DIFF: https://github.com/llvm/llvm-project/commit/a212d8da94d08e229aa8d65283e4b116310bba10.diff LOG: [MemProf] Memprof profile matching and annotation Profile matching and IR annotation for memprof profiles. See also related RFCs: RFC: Sanitizer-based Heap Profiler [1] RFC: A binary serialization format for MemProf [2] RFC: IR metadata format for MemProf [3]* * Note that the IR metadata format has changed from the RFC during implementation, as described in the preceeding patch adding the basic metadata and verification support. The matching is performed during the normal PGO annotation phase, to ensure that the inlines applied in the IR at that point are a subset of the inlines in the profiled binary and thus reflected in the profile's call stacks. This is important because the call frames are associated with functions in the profile based on the inlining in the symbolized call stacks, and this simplifies locating the subset of profile data relevant for matching onto each function's IR. The PGOInstrumentationUse pass is enhanced to perform matching for whatever combination of memprof and regular PGO profile data exists in the profile. Using the utilities introduced in D128854: The memprof profile data for each context is converted to "cold" or "notcold" based on parameterized thresholds for size, access count, and lifetime. The memprof allocation contexts are trimmed to the minimal amount of context required to uniquely identify whether the context is cold or not cold. For allocations where all profiled contexts have the same allocation type, no memprof metadata is attached and instead the allocation call is directly annotated with an attribute specifying the alloction type. This is the same attributed that will be applied to allocation calls once cloned for different contexts, and later used during LibCall simplification to emit allocation hints [4]. Depends on D128141 and D128854. [1] https://lists.llvm.org/pipermail/llvm-dev/2020-June/142744.html [2] https://lists.llvm.org/pipermail/llvm-dev/2021-September/153007.html [3] https://discourse.llvm.org/t/rfc-ir-metadata-format-for-memprof/59165 [4] https://github.com/google/tcmalloc/commit/ab87cf382dc56784f783f3aaa43d6d0465d5f385 Differential Revision: https://reviews.llvm.org/D128142 Added: clang/test/CodeGen/Inputs/memprof.exe clang/test/CodeGen/Inputs/memprof.memprofraw clang/test/CodeGen/memprof.cpp llvm/test/Transforms/PGOProfile/Inputs/memprof.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw llvm/test/Transforms/PGOProfile/memprof.ll llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll Modified: clang/lib/Frontend/CompilerInvocation.cpp llvm/include/llvm/Analysis/MemoryBuiltins.h llvm/include/llvm/ProfileData/InstrProfReader.h llvm/lib/Analysis/MemoryBuiltins.cpp llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp Removed: diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9f9241054b1ef..656e5950db988 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1306,7 +1306,10 @@ static void setPGOUseInstrumentor(CodeGenOptions , } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); - if (PGOReader->isIRLevelProfile()) { + // Currently memprof profiles are only added at the IR level. Mark the profile + // type as IR in that case as well and the subsequent matching needs to detect + // which is available (might be one or both). + if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe new file mode 100755 index 0..955c0d6b0e87a Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.exe diff er diff --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw new file mode 100644 index 0..07a3310c122af Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.memprofraw diff er diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp new file mode 100644 index 0..2ecb413f0d1a2 --- /dev/null +++ b/clang/test/CodeGen/memprof.cpp @@ -0,0 +1,35 @@ +// Test if memprof instrumentation and use pass are invoked. +// +// Instrumentation: +// Ensure Pass MemProfilerPass and ModuleMemProfilerPass are invoked. +// RUN: %clang_cc1 -O2 -fmemory-profile %s -fdebug-pass-manager
[clang] b4ac849 - [clang][NFC] Extract EmitAssemblyHelper::shouldEmitRegularLTOSummary
Author: Pavel Samolysov Date: 2022-04-07T10:38:46-07:00 New Revision: b4ac84901e9b88429e5e51dc0b4a17b8d3e37708 URL: https://github.com/llvm/llvm-project/commit/b4ac84901e9b88429e5e51dc0b4a17b8d3e37708 DIFF: https://github.com/llvm/llvm-project/commit/b4ac84901e9b88429e5e51dc0b4a17b8d3e37708.diff LOG: [clang][NFC] Extract EmitAssemblyHelper::shouldEmitRegularLTOSummary The code to check if the regular LTO summary should be emitted and to add the corresponding module flags was duplicated in the 'EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager' and 'EmitAssemblyHelper::RunOptimizationPipeline' methods. In order to eliminate these code duplications, the 'EmitAssemblyHelper::shouldEmitRegularLTOSummary' method has been extracted. The method returns a bool value, the value is 'true' if the module summary should be emitted. The patch keeps the setting of the module flags inline. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D123026 Added: Modified: clang/lib/CodeGen/BackendUtil.cpp Removed: diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 36776d39b952a..c09afefb3cc9a 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -164,6 +164,16 @@ class EmitAssemblyHelper { std::unique_ptr , std::unique_ptr ); + /// Check whether we should emit a module summary for regular LTO. + /// The module summary should be emitted by default for regular LTO + /// except for ld64 targets. + /// + /// \return True if the module summary should be emitted. + bool shouldEmitRegularLTOSummary() const { +return CodeGenOpts.PrepareForLTO && !CodeGenOpts.DisableLLVMPasses && + TargetTriple.getVendor() != llvm::Triple::Apple; + } + public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const HeaderSearchOptions , @@ -1054,9 +1064,7 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( } else { // Emit a module summary by default for Regular LTO except for ld64 // targets - bool EmitLTOSummary = - (CodeGenOpts.PrepareForLTO && !CodeGenOpts.DisableLLVMPasses && - TargetTriple.getVendor() != llvm::Triple::Apple); + bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); @@ -1064,7 +1072,6 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", uint32_t(1)); } - PerModulePasses.add(createBitcodeWriterPass( *OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); } @@ -1470,9 +1477,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( } else { // Emit a module summary by default for Regular LTO except for ld64 // targets - bool EmitLTOSummary = - (CodeGenOpts.PrepareForLTO && !CodeGenOpts.DisableLLVMPasses && - TargetTriple.getVendor() != llvm::Triple::Apple); + bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] b55a964 - Second attempt to fix Windows failures from test changes
Author: Teresa Johnson Date: 2021-09-29T19:24:35-07:00 New Revision: b55a964197bdc651533377bbd0b46fa58edf9196 URL: https://github.com/llvm/llvm-project/commit/b55a964197bdc651533377bbd0b46fa58edf9196 DIFF: https://github.com/llvm/llvm-project/commit/b55a964197bdc651533377bbd0b46fa58edf9196.diff LOG: Second attempt to fix Windows failures from test changes Try to address Windows flakes from d87bdc272ba47b7d9109ff5c7191454ab2ae6fcb by adding "|| true" as suggested in D110276 so the whole test doesn't fail when Windows thinks it can't remove the binary. Added: Modified: clang/test/Driver/clang_f_opts.c lld/test/COFF/pdb-relative-source-lines.test Removed: diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 3325dc44a8942..4b0159b80f739 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -566,7 +566,7 @@ // RUN: "%t.r/with spaces/clang" -### -S -target x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ESCAPED %s // CHECK-RECORD-GCC-SWITCHES-ESCAPED: "-record-command-line" "{{.+}}with\\ spaces{{.+}}" // Clean up copy of large binary copied into temp directory to avoid bloat. -// RUN: rm -f "%t.r/with spaces/clang" +// RUN: rm -f "%t.r/with spaces/clang" || true // RUN: %clang -### -S -ftrivial-auto-var-init=uninitialized %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-UNINIT %s // RUN: %clang -### -S -ftrivial-auto-var-init=pattern %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-PATTERN %s diff --git a/lld/test/COFF/pdb-relative-source-lines.test b/lld/test/COFF/pdb-relative-source-lines.test index 728667bc43b65..b72c5036a9159 100644 --- a/lld/test/COFF/pdb-relative-source-lines.test +++ b/lld/test/COFF/pdb-relative-source-lines.test @@ -42,7 +42,7 @@ RUN: ./lld-link -debug -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_l RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=lines,fc %t/out.pdb | FileCheck --check-prefix=ABSOLUTE %s Clean up copy of large binary copied into temp directory to avoid bloat. -RUN: rm -f ./lld-link +RUN: rm -f ./lld-link || true CHECK-LABEL: - Module: 'c:\src\pdb_lines_1_relative.obj' CHECK-NEXT: ObjFile: 'c:\src\pdb_lines_1_relative.obj' ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 2f1b99c - Use rm -f to fix Windows failures from test changes
Author: Teresa Johnson Date: 2021-09-29T08:01:22-07:00 New Revision: 2f1b99ca67da18d858a4b070716790a8f53891d6 URL: https://github.com/llvm/llvm-project/commit/2f1b99ca67da18d858a4b070716790a8f53891d6 DIFF: https://github.com/llvm/llvm-project/commit/2f1b99ca67da18d858a4b070716790a8f53891d6.diff LOG: Use rm -f to fix Windows failures from test changes Try to address Windows flakes from d87bdc272ba47b7d9109ff5c7191454ab2ae6fcb by using 'rm -f' instead of just 'rm' as discussed in D110276. For example: http://45.33.8.238/win/46115/step_7.txt Added: Modified: clang/test/Driver/clang_f_opts.c lld/test/COFF/pdb-relative-source-lines.test Removed: diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 2405fc9f0327e..3325dc44a8942 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -566,7 +566,7 @@ // RUN: "%t.r/with spaces/clang" -### -S -target x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ESCAPED %s // CHECK-RECORD-GCC-SWITCHES-ESCAPED: "-record-command-line" "{{.+}}with\\ spaces{{.+}}" // Clean up copy of large binary copied into temp directory to avoid bloat. -// RUN: rm "%t.r/with spaces/clang" +// RUN: rm -f "%t.r/with spaces/clang" // RUN: %clang -### -S -ftrivial-auto-var-init=uninitialized %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-UNINIT %s // RUN: %clang -### -S -ftrivial-auto-var-init=pattern %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-PATTERN %s diff --git a/lld/test/COFF/pdb-relative-source-lines.test b/lld/test/COFF/pdb-relative-source-lines.test index 2a131dd39f561..728667bc43b65 100644 --- a/lld/test/COFF/pdb-relative-source-lines.test +++ b/lld/test/COFF/pdb-relative-source-lines.test @@ -42,7 +42,7 @@ RUN: ./lld-link -debug -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_l RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=lines,fc %t/out.pdb | FileCheck --check-prefix=ABSOLUTE %s Clean up copy of large binary copied into temp directory to avoid bloat. -RUN: rm ./lld-link +RUN: rm -f ./lld-link CHECK-LABEL: - Module: 'c:\src\pdb_lines_1_relative.obj' CHECK-NEXT: ObjFile: 'c:\src\pdb_lines_1_relative.obj' ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] d87bdc2 - Clean up large copies of binaries copied into temp directories in tests
Author: Teresa Johnson Date: 2021-09-28T17:04:09-07:00 New Revision: d87bdc272ba47b7d9109ff5c7191454ab2ae6fcb URL: https://github.com/llvm/llvm-project/commit/d87bdc272ba47b7d9109ff5c7191454ab2ae6fcb DIFF: https://github.com/llvm/llvm-project/commit/d87bdc272ba47b7d9109ff5c7191454ab2ae6fcb.diff LOG: Clean up large copies of binaries copied into temp directories in tests In looking at the disk space used by a ninja check-all, I found that a few of the largest files were copies of clang and lld made into temp directories by a couple of tests. These tests were added in D53021 and D74811. Clean up these copies after usage. Differential Revision: https://reviews.llvm.org/D110276 Added: Modified: clang/test/Driver/clang_f_opts.c lld/test/COFF/pdb-relative-source-lines.test Removed: diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 487ae08250396..2405fc9f0327e 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -565,6 +565,8 @@ // RUN: cp %clang "%t.r/with spaces/clang" // RUN: "%t.r/with spaces/clang" -### -S -target x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ESCAPED %s // CHECK-RECORD-GCC-SWITCHES-ESCAPED: "-record-command-line" "{{.+}}with\\ spaces{{.+}}" +// Clean up copy of large binary copied into temp directory to avoid bloat. +// RUN: rm "%t.r/with spaces/clang" // RUN: %clang -### -S -ftrivial-auto-var-init=uninitialized %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-UNINIT %s // RUN: %clang -### -S -ftrivial-auto-var-init=pattern %s 2>&1 | FileCheck -check-prefix=CHECK-TRIVIAL-PATTERN %s diff --git a/lld/test/COFF/pdb-relative-source-lines.test b/lld/test/COFF/pdb-relative-source-lines.test index cc241c7ee4d7c..2a131dd39f561 100644 --- a/lld/test/COFF/pdb-relative-source-lines.test +++ b/lld/test/COFF/pdb-relative-source-lines.test @@ -41,6 +41,9 @@ Also check without -pdbsourcepath RUN: ./lld-link -debug -entry:main -nodefaultlib -out:out.exe -pdb:out.pdb pdb_lines_1_relative.obj pdb_lines_2_relative.obj RUN: llvm-pdbutil pdb2yaml -modules -module-files -module-syms -subsections=lines,fc %t/out.pdb | FileCheck --check-prefix=ABSOLUTE %s +Clean up copy of large binary copied into temp directory to avoid bloat. +RUN: rm ./lld-link + CHECK-LABEL: - Module: 'c:\src\pdb_lines_1_relative.obj' CHECK-NEXT: ObjFile: 'c:\src\pdb_lines_1_relative.obj' CHECK: SourceFiles: ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] d0ee8b6 - [LTO] Fix -fwhole-program-vtables handling after HIP ThinLTO patch
Author: Teresa Johnson Date: 2021-06-03T14:25:03-07:00 New Revision: d0ee8b64ecf359737ce550d8f47f465ab6657be7 URL: https://github.com/llvm/llvm-project/commit/d0ee8b64ecf359737ce550d8f47f465ab6657be7 DIFF: https://github.com/llvm/llvm-project/commit/d0ee8b64ecf359737ce550d8f47f465ab6657be7.diff LOG: [LTO] Fix -fwhole-program-vtables handling after HIP ThinLTO patch A recent change (D99683) to support ThinLTO for HIP caused a regression when compiling cuda code with -flto=thin -fwhole-program-vtables. Specifically, we now get an error: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' This error is coming from the device offload cc1 action being set up for the cuda compile, for which -flto=thin doesn't apply and gets dropped. This is a regression, but points to a potential issue that was silently occurring before the patch, details below. Before D99683, the check for fwhole-program-vtables in the driver looked like: if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; CmdArgs.push_back("-fwhole-program-vtables"); } And D.isUsingLTO() returned true since we have -flto=thin. However, because the cuda cc1 compile is doing device offloading, which didn't support any LTO, there was other code that suppressed -flto* options from being passed to the cc1 invocation. So the cc1 invocation silently had -fwhole-program-vtables without any -flto*. This seems potentially problematic, since if we had any virtual calls we would get type test assume sequences without the corresponding LTO pass that handles them. However, with the patch, which adds support for device offloading LTO option -foffload-lto=thin, the code has changed so that we set a bool IsUsingLTO based on either -flto* or -foffload-lto*, depending on whether this is the device offloading action. For the device offload action in our compile, since we don't have -foffload-lto, IsUsingLTO is false, and the check for LTO with -fwhole-program-vtables now fails. What we should do is only pass through -fwhole-program-vtables to the cc1 invocation that has LTO enabled (either the device offload action with -foffload-lto, or the non-device offload action with -flto), and otherwise drop the -fwhole-program-vtables for the non-LTO action. Then we should error only if we have -fwhole-program-vtables without any -f*lto* options. Differential Revision: https://reviews.llvm.org/D103579 Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/cuda-options.cu clang/test/Driver/hip-options.hip Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index dea4ade683ef8..ee40df35b8507 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6647,11 +6647,17 @@ void Clang::ConstructJob(Compilation , const JobAction , } if (WholeProgramVTables) { -if (!IsUsingLTO) +// Propagate -fwhole-program-vtables if this is an LTO compile. +if (IsUsingLTO) + CmdArgs.push_back("-fwhole-program-vtables"); +// Check if we passed LTO options but they were suppressed because this is a +// device offloading action, or we passed device offload LTO options which +// were suppressed because this is not the device offload action. +// Otherwise, issue an error. +else if (!D.isUsingLTO(!IsDeviceOffloadAction)) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; -CmdArgs.push_back("-fwhole-program-vtables"); } bool DefaultsSplitLTOUnit = diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index 175e4b877ce94..5b67d7e4d04ff 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -183,6 +183,12 @@ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s +// Verify -flto=thin -fwhole-program-vtables handling. This should result in +// both options being passed to the host compilation, with neither passed to +// the device compilation. +// RUN: %clang -### -target x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s +// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" @@ -206,8 +212,10 @@ // Match the job that produces PTX assembly. // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// THINLTOWPD-NOT: "-flto=thin" // DEVICE-SAME: "-fcuda-is-device" // DEVICE-SM30-SAME:
[clang] 0923a60 - [clang] Emit type metadata on available_externally vtables for WPD
Author: Teresa Johnson Date: 2021-02-19T12:42:34-08:00 New Revision: 0923a60ea70f884d2f170f65d0faa494a25af231 URL: https://github.com/llvm/llvm-project/commit/0923a60ea70f884d2f170f65d0faa494a25af231 DIFF: https://github.com/llvm/llvm-project/commit/0923a60ea70f884d2f170f65d0faa494a25af231.diff LOG: [clang] Emit type metadata on available_externally vtables for WPD When WPD is enabled, via WholeProgramVTables, emit type metadata for available_externally vtables. Additionally, add the vtables to the llvm.compiler.used global so that they are not prematurely eliminated (before *LTO analysis). This is needed to avoid devirtualizing calls to a function overriding a class defined in a header file but with a strong definition in a shared library. Without type metadata on the available_externally vtables from the header, the WPD analysis never sees what a derived class is overriding. Even if the available_externally base class functions are pure virtual, because shared library definitions are already treated conservatively (committed patches D91583, D96721, and D96722) we will not devirtualize, which would be unsafe since the library might contain overrides that aren't visible to the LTO unit. An example is std::error_category, which is overridden in LLVM and causing failures after a self build with WPD enabled, because libstdc++ contains hidden overrides of the virtual base class methods. Differential Revision: https://reviews.llvm.org/D96919 Added: Modified: clang/lib/CodeGen/ItaniumCXXABI.cpp clang/test/CodeGenCXX/type-metadata.cpp Removed: diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 0350aebfdf7f..2d202414e98e 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1767,8 +1767,22 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables , DC->getParent()->isTranslationUnit()) EmitFundamentalRTTIDescriptors(RD); - if (!VTable->isDeclarationForLinker()) + // Always emit type metadata on non-available_externally definitions, and on + // available_externally definitions if we are performing whole program + // devirtualization. For WPD we need the type metadata on all vtable + // definitions to ensure we associate derived classes with base classes + // defined in headers but with a strong definition only in a shared library. + if (!VTable->isDeclarationForLinker() || + CGM.getCodeGenOpts().WholeProgramVTables) { CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); +// For available_externally definitions, add the vtable to +// @llvm.compiler.used so that it isn't deleted before whole program +// analysis. +if (VTable->isDeclarationForLinker()) { + assert(CGM.getCodeGenOpts().WholeProgramVTables); + CGM.addCompilerUsedGlobal(VTable); +} + } if (VTContext.isRelativeLayout() && !VTable->isDSOLocal()) CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName()); diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 9bd91fe5d843..e4f7e0f6228e 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -7,6 +7,7 @@ // Tests for the whole-program-vtables feature: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=TT-ITANIUM %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM %s +// RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s // Tests for cfi + whole-program-vtables: @@ -79,6 +80,13 @@ // ITANIUM-DIAG-SAME: !type [[ALL16]] // ITANIUM-SAME: !type [[FAF16:![0-9]+]] +// ITANIUM: @_ZTVN5test31EE = external unnamed_addr constant +// ITANIUM-DEFAULTVIS: @_ZTVN5test31EE = external unnamed_addr constant +// ITANIUM-OPT: @_ZTVN5test31EE = available_externally unnamed_addr constant {{[^!]*}}, +// ITANIUM-OPT-SAME: !type [[E16:![0-9]+]], +// ITANIUM-OPT-SAME: !type [[EF16:![0-9]+]] +// ITANIUM-OPT: @llvm.compiler.used = appending global [1 x i8*] [i8* bitcast ({ [3 x i8*] }* @_ZTVN5test31EE to i8*)] + // MS: comdat($"??_7A@@6B@"), !type [[A8:![0-9]+]] // MS: comdat($"??_7B@@6B0@@"), !type [[B8:![0-9]+]] // MS: comdat($"??_7B@@6BA@@@"), !type [[A8]] @@ -253,6 +261,20 @@ void f(D *d) { } +namespace test3 { +// All virtual functions are outline, so
[clang] 0768b05 - Avoid redundant work when computing vtable vcall visibility
Author: Teresa Johnson Date: 2020-11-24T12:06:24-08:00 New Revision: 0768b0576a938b6a4832884384fcb02cd2f74e09 URL: https://github.com/llvm/llvm-project/commit/0768b0576a938b6a4832884384fcb02cd2f74e09 DIFF: https://github.com/llvm/llvm-project/commit/0768b0576a938b6a4832884384fcb02cd2f74e09.diff LOG: Avoid redundant work when computing vtable vcall visibility Add a Visited set to avoid repeatedly processing the same base classes in complex class hierarchies. This cut down the compile time of one source file from >12min to ~1min. Differential Revision: https://reviews.llvm.org/D91676 Added: Modified: clang/lib/CodeGen/CGVTables.cpp clang/lib/CodeGen/CodeGenModule.h clang/lib/CodeGen/MicrosoftCXXABI.cpp Removed: diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 65b3b0c5f53d..75afc860cc47 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1294,8 +1294,16 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return !HasLTOVisibilityPublicStd(RD); } -llvm::GlobalObject::VCallVisibility -CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { +llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( +const CXXRecordDecl *RD, llvm::DenseSet ) { + // If we have already visited this RD (which means this is a recursive call + // since the initial call should have an empty Visited set), return the max + // visibility. The recursive calls below compute the min between the result + // of the recursive call and the current TypeVis, so returning the max here + // ensures that it will have no effect on the current TypeVis. + if (!Visited.insert(RD).second) +return llvm::GlobalObject::VCallVisibilityTranslationUnit; + LinkageInfo LV = RD->getLinkageAndVisibility(); llvm::GlobalObject::VCallVisibility TypeVis; if (!isExternallyVisible(LV.getLinkage())) @@ -1307,13 +1315,15 @@ CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { for (auto B : RD->bases()) if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) - TypeVis = std::min(TypeVis, - GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + TypeVis = std::min( + TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl(), Visited)); for (auto B : RD->vbases()) if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) - TypeVis = std::min(TypeVis, - GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + TypeVis = std::min( + TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl(), Visited)); return TypeVis; } @@ -1382,7 +1392,9 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, if (getCodeGenOpts().VirtualFunctionElimination || getCodeGenOpts().WholeProgramVTables) { -llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); +llvm::DenseSet Visited; +llvm::GlobalObject::VCallVisibility TypeVis = +GetVCallVisibilityLevel(RD, Visited); if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) VTable->setVCallVisibilityMetadata(TypeVis); } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 7d812b6658dc..c59570598b0d 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1333,8 +1333,11 @@ class CodeGenModule : public CodeGenTypeCache { /// a virtual function call could be made which ends up being dispatched to a /// member function of this class. This scope can be wider than the visibility /// of the class itself when the class has a more-visible dynamic base class. + /// The client should pass in an empty Visited set, which is used to prevent + /// redundant recursive processing. llvm::GlobalObject::VCallVisibility - GetVCallVisibilityLevel(const CXXRecordDecl *RD); + GetVCallVisibilityLevel(const CXXRecordDecl *RD, + llvm::DenseSet ); /// Emit type metadata for the given vtable using the given layout. void EmitVTableTypeMetadata(const CXXRecordDecl *RD, diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 8046b7afce57..c16c72dc93d5 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1649,8 +1649,9 @@ void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo , // TODO: Should VirtualFunctionElimination also be supported here? // See similar handling in CodeGenModule::EmitVTableTypeMetadata. if (CGM.getCodeGenOpts().WholeProgramVTables) { +llvm::DenseSet Visited; llvm::GlobalObject::VCallVisibility TypeVis = -CGM.GetVCallVisibilityLevel(RD); +CGM.GetVCallVisibilityLevel(RD, Visited); if (TypeVis !=
[clang] 6e4c1cf - [ThinLTO/WPD] Enable -wholeprogramdevirt-skip in ThinLTO backends
Author: Teresa Johnson Date: 2020-11-24T09:35:07-08:00 New Revision: 6e4c1cf2938842ceefc2712f0007843369dd16ca URL: https://github.com/llvm/llvm-project/commit/6e4c1cf2938842ceefc2712f0007843369dd16ca DIFF: https://github.com/llvm/llvm-project/commit/6e4c1cf2938842ceefc2712f0007843369dd16ca.diff LOG: [ThinLTO/WPD] Enable -wholeprogramdevirt-skip in ThinLTO backends Previously this option could be used to skip devirtualizations of the given functions in regular LTO and in the ThinLTO indexing step. This change allows them to be skipped in the backend as well, which is useful when debugging WPD in a distributed ThinLTO backend. Differential Revision: https://reviews.llvm.org/D91812 Added: Modified: clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp Removed: diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 5c753ba6f93c..0a330a53e948 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -40,8 +40,16 @@ ; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi") ; guid = 7004155349499253778 ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ -; RUN: -emit-obj -fthinlto-index=%t.o.thinlto.bc -O2 \ -; RUN: -emit-llvm -o - -x ir %t.o | FileCheck %s --check-prefixes=CHECK-IR +; RUN: -emit-obj -fthinlto-index=%t.o.thinlto.bc -O2 -Rpass=wholeprogramdevirt \ +; RUN: -emit-llvm -o - -x ir %t.o 2>&1 | FileCheck %s --check-prefixes=CHECK-IR --check-prefixes=REMARKS + +; Check that the devirtualization is suppressed via -wholeprogramdevirt-skip +; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu -mllvm -wholeprogramdevirt-skip=_ZN1A1nEi \ +; RUN: -emit-obj -fthinlto-index=%t.o.thinlto.bc -O2 -Rpass=wholeprogramdevirt \ +; RUN: -emit-llvm -o - -x ir %t.o 2>&1 | FileCheck %s --check-prefixes=SKIP-IR --check-prefixes=SKIP-REMARKS + +; REMARKS: single-impl: devirtualized a call to _ZN1A1nEi +; SKIP-REMARKS-NOT: single-impl ; Check that backend does not fail generating native code. ; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \ @@ -78,6 +86,7 @@ cont: ; Check that the call was devirtualized. ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; SKIP-IR-NOT: %call = tail call i32 @_ZN1A1nEi %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) %vtable16 = load i8*, i8** %0 %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A") diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index e97f1acbb396..5350d85e11f3 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1030,6 +1030,10 @@ bool DevirtIndex::tryFindVirtualCallTargets( void DevirtModule::applySingleImplDevirt(VTableSlotInfo , Constant *TheFn, bool ) { + // Don't devirtualize function if we're told to skip it + // in -wholeprogramdevirt-skip. + if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName())) +return; auto Apply = [&](CallSiteInfo ) { for (auto & : CSInfo.CallSites) { if (RemarksEnabled) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 95824be - [MemProf] Fix test failure on windows
Author: Teresa Johnson Date: 2020-11-01T19:06:50-08:00 New Revision: 95824be18fcd70a90787fecd1e51ca0c67d8bd20 URL: https://github.com/llvm/llvm-project/commit/95824be18fcd70a90787fecd1e51ca0c67d8bd20 DIFF: https://github.com/llvm/llvm-project/commit/95824be18fcd70a90787fecd1e51ca0c67d8bd20.diff LOG: [MemProf] Fix test failure on windows Fix failure in new test from 0949f96dc6521be80ebb8ebc1e1c506165c22aac: Don't match exact file path separator. Should fix: http://lab.llvm.org:8011/#/builders/119/builds/437/steps/9/logs/FAIL__Clang__memory-profile-filename_c Added: Modified: clang/test/CodeGen/memory-profile-filename.c Removed: diff --git a/clang/test/CodeGen/memory-profile-filename.c b/clang/test/CodeGen/memory-profile-filename.c index 0041aca1cd7a..89ce90991d3c 100644 --- a/clang/test/CodeGen/memory-profile-filename.c +++ b/clang/test/CodeGen/memory-profile-filename.c @@ -9,4 +9,4 @@ int main(void) { // NONE-NOT: MemProfProfileFilename // DEFAULTNAME: !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"} -// CUSTOMNAME: !{i32 1, !"MemProfProfileFilename", !"/tmp/memprof.profraw"} +// CUSTOMNAME: !{i32 1, !"MemProfProfileFilename", !"/tmp{{.*}}memprof.profraw"} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 0949f96 - [MemProf] Pass down memory profile name with optional path from clang
Author: Teresa Johnson Date: 2020-11-01T17:38:23-08:00 New Revision: 0949f96dc6521be80ebb8ebc1e1c506165c22aac URL: https://github.com/llvm/llvm-project/commit/0949f96dc6521be80ebb8ebc1e1c506165c22aac DIFF: https://github.com/llvm/llvm-project/commit/0949f96dc6521be80ebb8ebc1e1c506165c22aac.diff LOG: [MemProf] Pass down memory profile name with optional path from clang Similar to -fprofile-generate=, add -fmemory-profile= which takes a directory path. This is passed down to LLVM via a new module flag metadata. LLVM in turn provides this name to the runtime via the new __memprof_profile_filename variable. Additionally, always pass a default filename (in $cwd if a directory name is not specified vi the = form of the option). This is also consistent with the behavior of the PGO instrumentation. Since the memory profiles will generally be fairly large, it doesn't make sense to dump them to stderr. Also, importantly, the memory profiles will eventually be dumped in a compact binary format, which is another reason why it does not make sense to send these to stderr by default. Change the existing memprof tests to specify log_path=stderr when that was being relied on. Depends on D89086. Differential Revision: https://reviews.llvm.org/D89087 Added: clang/test/CodeGen/memory-profile-filename.c llvm/test/Instrumentation/HeapProfiler/filename.ll Modified: clang/include/clang/Basic/CodeGenOptions.def clang/include/clang/Basic/CodeGenOptions.h clang/include/clang/Driver/Options.td clang/lib/CodeGen/BackendUtil.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Driver/SanitizerArgs.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/Driver/fmemprof.cpp compiler-rt/test/memprof/TestCases/atexit_stats.cpp compiler-rt/test/memprof/TestCases/dump_process_map.cpp compiler-rt/test/memprof/TestCases/log_path_test.cpp compiler-rt/test/memprof/TestCases/malloc-size-too-big.cpp compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp compiler-rt/test/memprof/TestCases/print_miss_rate.cpp compiler-rt/test/memprof/TestCases/stress_dtls.c compiler-rt/test/memprof/TestCases/test_malloc_load_store.c compiler-rt/test/memprof/TestCases/test_memintrin.cpp compiler-rt/test/memprof/TestCases/test_new_load_store.cpp compiler-rt/test/memprof/TestCases/test_terse.cpp compiler-rt/test/memprof/TestCases/unaligned_loads_and_stores.cpp llvm/lib/Transforms/Instrumentation/MemProfiler.cpp Removed: diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index f5222b50fc7b..7cd80aa806db 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -151,7 +151,6 @@ CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions, 1, 0) ///< Set when -fmerge-functions is enabled. -CODEGENOPT(MemProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile, 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 764d0a17cb72..6452d2bb25f6 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -231,6 +231,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Name of the profile file to use with -fprofile-sample-use. std::string SampleProfileFile; + /// Name of the profile file to use as output for with -fmemory-profile. + std::string MemoryProfileOutput; + /// Name of the profile file to use as input for -fprofile-instr-use std::string ProfileInstrumentUsePath; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0f7440896cb4..165baf06cbb2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1017,6 +1017,9 @@ def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; defm memory_profile : OptInFFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">; +def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">, +Group, Flags<[CC1Option]>, MetaVarName<"">, +HelpText<"Enable heap memory profiling and dump results into ">; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index
[clang] 226d80e - [MemProf] Rename HeapProfiler to MemProfiler for consistency
Author: Teresa Johnson Date: 2020-09-14T13:14:57-07:00 New Revision: 226d80ebe20e2d796af6c1bc43d9fbdfbb9d4a07 URL: https://github.com/llvm/llvm-project/commit/226d80ebe20e2d796af6c1bc43d9fbdfbb9d4a07 DIFF: https://github.com/llvm/llvm-project/commit/226d80ebe20e2d796af6c1bc43d9fbdfbb9d4a07.diff LOG: [MemProf] Rename HeapProfiler to MemProfiler for consistency This is consistent with the clang option added in 7ed8124d46f94601d5f1364becee9cee8538265e, and the comments on the runtime patch in D87120. Differential Revision: https://reviews.llvm.org/D87622 Added: llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h llvm/lib/Transforms/Instrumentation/MemProfiler.cpp Modified: clang/include/clang/Basic/CodeGenOptions.def clang/include/clang/Driver/SanitizerArgs.h clang/lib/CodeGen/BackendUtil.cpp clang/lib/Driver/SanitizerArgs.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/Driver/fmemprof.cpp llvm/include/llvm/InitializePasses.h llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassRegistry.def llvm/lib/Transforms/Instrumentation/CMakeLists.txt llvm/lib/Transforms/Instrumentation/Instrumentation.cpp llvm/test/Instrumentation/HeapProfiler/basic.ll llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll Removed: llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 740d54471051..feb4ed01f6e8 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -145,7 +145,7 @@ CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions, 1, 0) ///< Set when -fmerge-functions is enabled. -CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemory-profile is enabled. +CODEGENOPT(MemProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile, 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index 95d6bcf35c78..ac2b817be1dc 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -55,7 +55,7 @@ class SanitizerArgs { bool MinimalRuntime = false; // True if cross-dso CFI support if provided by the system (i.e. Android). bool ImplicitCfiRuntime = false; - bool NeedsHeapProfRt = false; + bool NeedsMemProfRt = false; public: /// Parses the sanitizer arguments from an argument list. @@ -63,7 +63,7 @@ class SanitizerArgs { bool needsSharedRt() const { return SharedRuntime; } - bool needsHeapProfRt() const { return NeedsHeapProfRt; } + bool needsMemProfRt() const { return NeedsMemProfRt; } bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); } bool needsHwasanRt() const { return Sanitizers.has(SanitizerKind::HWAddress); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 258f5fe69ff8..472d86ea2e36 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -67,8 +67,8 @@ #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" -#include "llvm/Transforms/Instrumentation/HeapProfiler.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" @@ -268,10 +268,10 @@ static bool asanUseGlobalsGC(const Triple , const CodeGenOptions ) { return false; } -static void addHeapProfilerPasses(const PassManagerBuilder , - legacy::PassManagerBase ) { - PM.add(createHeapProfilerFunctionPass()); - PM.add(createModuleHeapProfilerLegacyPassPass()); +static void addMemProfilerPasses(const PassManagerBuilder , + legacy::PassManagerBase ) { + PM.add(createMemProfilerFunctionPass()); +