Author: Hongtao Yu Date: 2021-09-22T09:09:48-07:00 New Revision: d9b511d8e8c43f79e0e277be287656693dd6563f
URL: https://github.com/llvm/llvm-project/commit/d9b511d8e8c43f79e0e277be287656693dd6563f DIFF: https://github.com/llvm/llvm-project/commit/d9b511d8e8c43f79e0e277be287656693dd6563f.diff LOG: [CSSPGO] Set PseudoProbeInserter as a default pass. Currenlty PseudoProbeInserter is a pass conditioned on a target switch. It works well with a single clang invocation. It doesn't work so well when the backend is called separately (i.e, through the linker or llc), where user has always to pass -pseudo-probe-for-profiling explictly. I'm making the pass a default pass that requires no command line arg to trigger, but will be actually run depending on whether the CU comes with `llvm.pseudo_probe_desc` metadata. Reviewed By: wenlei Differential Revision: https://reviews.llvm.org/D110209 Added: Modified: clang/lib/CodeGen/BackendUtil.cpp lld/ELF/Config.h lld/ELF/Driver.cpp lld/ELF/LTO.cpp lld/ELF/Options.td lld/test/ELF/lto/pseudo-probe-lto.ll llvm/include/llvm/CodeGen/CommandFlags.h llvm/include/llvm/Target/TargetOptions.h llvm/lib/CodeGen/CommandFlags.cpp llvm/lib/CodeGen/PseudoProbeInserter.cpp llvm/lib/CodeGen/TargetPassConfig.cpp llvm/lib/Target/X86/X86TargetMachine.cpp llvm/test/CodeGen/X86/O0-pipeline.ll llvm/test/CodeGen/X86/opt-pipeline.ll llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test Removed: ################################################################################ diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e31fa3f9f94de..99e33b227f792 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -576,7 +576,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI; - Options.PseudoProbeForProfiling = CodeGenOpts.PseudoProbeForProfiling; Options.ValueTrackingVariableLocations = CodeGenOpts.ValueTrackingVariableLocations; Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f9851d03e78bf..65101d29136e2 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -183,7 +183,6 @@ struct Configuration { bool ltoDebugPassManager; bool ltoEmitAsm; bool ltoNewPassManager; - bool ltoPseudoProbeForProfiling; bool ltoUniqueBasicBlockSectionNames; bool ltoWholeProgramVisibility; bool mergeArmExidx; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 6607c0fe15a4b..8cb81987163fc 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1084,8 +1084,6 @@ static void readConfigs(opt::InputArgList &args) { config->ltoo = args::getInteger(args, OPT_lto_O, 2); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); - config->ltoPseudoProbeForProfiling = - args.hasArg(OPT_lto_pseudo_probe_for_profiling); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); config->ltoBasicBlockSections = args.getLastArgValue(OPT_lto_basic_block_sections); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 1f60e1e8a395c..fb354f81d49d6 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -112,7 +112,6 @@ static lto::Config createConfig() { } } - c.Options.PseudoProbeForProfiling = config->ltoPseudoProbeForProfiling; c.Options.UniqueBasicBlockSectionNames = config->ltoUniqueBasicBlockSectionNames; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 874399d5f41f2..852a27d62812b 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -574,8 +574,6 @@ def lto_sample_profile: JJ<"lto-sample-profile=">, defm lto_whole_program_visibility: BB<"lto-whole-program-visibility", "Asserts that the LTO link has whole program visibility", "Asserts that the LTO link does not have whole program visibility">; -def lto_pseudo_probe_for_profiling: F<"lto-pseudo-probe-for-profiling">, - HelpText<"Emit pseudo probes for sample profiling">; def disable_verify: F<"disable-verify">; defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, @@ -651,8 +649,6 @@ def: F<"plugin-opt=opt-remarks-with-hotness">, def: J<"plugin-opt=opt-remarks-hotness-threshold=">, Alias<opt_remarks_hotness_threshold>, HelpText<"Alias for --opt-remarks-hotness-threshold">; -def: J<"plugin-opt=pseudo-probe-for-profiling">, - Alias<lto_pseudo_probe_for_profiling>, HelpText<"Alias for --lto-pseudo-probe-for-profiling">; def: J<"plugin-opt=sample-profile=">, Alias<lto_sample_profile>, HelpText<"Alias for --lto-sample-profile">; def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for --save-temps">; diff --git a/lld/test/ELF/lto/pseudo-probe-lto.ll b/lld/test/ELF/lto/pseudo-probe-lto.ll index ae71876087fc2..925566b33e1d9 100644 --- a/lld/test/ELF/lto/pseudo-probe-lto.ll +++ b/lld/test/ELF/lto/pseudo-probe-lto.ll @@ -1,7 +1,6 @@ ; REQUIRES: x86 ; RUN: opt < %s -passes=pseudo-probe -function-sections -o %t.o -; RUN: ld.lld %t.o -shared --lto-pseudo-probe-for-profiling --lto-emit-asm -o - | FileCheck %s -; RUN: ld.lld %t.o -shared -plugin-opt=pseudo-probe-for-profiling --lto-emit-asm -o - | FileCheck %s +; RUN: ld.lld %t.o -shared --lto-emit-asm -o - | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-scei-ps4" diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index ab665eea99680..ed3cd54df2727 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -129,8 +129,6 @@ bool getEnableMachineFunctionSplitter(); bool getEnableDebugEntryValues(); -bool getPseudoProbeForProfiling(); - bool getValueTrackingVariableLocations(); bool getForceDwarfFrameSection(); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 7f57108199cf2..11f9296f900ca 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -140,7 +140,7 @@ namespace llvm { EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), - PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false), + ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} @@ -321,9 +321,6 @@ namespace llvm { /// production. bool ShouldEmitDebugEntryValues() const; - /// Emit pseudo probes into the binary for sample profiling - unsigned PseudoProbeForProfiling : 1; - // When set to true, use experimental new debug variable location tracking, // which seeks to follow the values of variables rather than their location, // post isel. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 8f3163004e0e5..a1ff02178ffae 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -90,7 +90,6 @@ CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) -CGOPT(bool, PseudoProbeForProfiling) CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) @@ -434,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableDebugEntryValues); - static cl::opt<bool> PseudoProbeForProfiling( - "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"), - cl::init(false)); - CGBINDOPT(PseudoProbeForProfiling); - static cl::opt<bool> ValueTrackingVariableLocations( "experimental-debug-variable-locations", cl::desc("Use experimental new value-tracking variable locations"), @@ -540,7 +534,6 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); - Options.PseudoProbeForProfiling = getPseudoProbeForProfiling(); Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp index a9fb577d57351..5f69f91941253 100644 --- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -44,7 +44,14 @@ class PseudoProbeInserter : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } + bool doInitialization(Module &M) override { + ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName); + return false; + } + bool runOnMachineFunction(MachineFunction &MF) override { + if (!ShouldRun) + return false; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { @@ -129,6 +136,8 @@ class PseudoProbeInserter : public MachineFunctionPass { Name = SP->getName(); return Function::getGUID(Name); } + + bool ShouldRun = false; }; } // namespace diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 2a90c3154bb41..f6d6cbf1022fe 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1276,10 +1276,6 @@ void TargetPassConfig::addMachinePasses() { // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); - // Insert pseudo probe annotation for callsite profiling - if (TM->Options.PseudoProbeForProfiling) - addPass(createPseudoProbeInserter()); - AddingMachinePasses = false; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 48d87409a88d9..9e9aa0dc5ddb2 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -585,6 +585,9 @@ void X86PassConfig::addPreEmitPass2() { addPass(createEHContGuardCatchretPass()); } addPass(createX86LoadValueInjectionRetHardeningPass()); + + // Insert pseudo probe annotation for callsite profiling + addPass(createPseudoProbeInserter()); } bool X86PassConfig::addPostFastRegAllocRewrite() { diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index f7999097d71a4..54eecb113540f 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -72,7 +72,8 @@ ; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression ; CHECK-NEXT: X86 Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed -; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening +; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening +; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 6f634fd536ab7..81493a9be856d 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -200,6 +200,7 @@ ; CHECK-NEXT: X86 Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening +; CHECK-NEXT: Pseudo Probe Inserter ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll index 707977f68957e..4647a34fc2f62 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll @@ -1,9 +1,9 @@ ; REQUIRES: x86_64-linux ; RUN: opt < %s -passes='pseudo-probe,jump-threading' -S -o %t ; RUN: FileCheck %s < %t --check-prefix=JT -; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm | FileCheck %s --check-prefix=ASM +; RUN: llc -function-sections <%t -filetype=asm | FileCheck %s --check-prefix=ASM ; RUN: opt < %s -passes='pseudo-probe' -S -o %t1 -; RUN: llc -pseudo-probe-for-profiling -stop-after=tailduplication <%t1 | FileCheck %s --check-prefix=MIR-tail +; RUN: llc -stop-after=tailduplication <%t1 | FileCheck %s --check-prefix=MIR-tail ; RUN: opt < %s -passes='pseudo-probe,simplifycfg' -S | FileCheck %s --check-prefix=SC declare i32 @f1() diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll index 4f730ba09a3ae..53a122653e713 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -1,9 +1,9 @@ ; REQUIRES: x86_64-linux ; RUN: opt < %s -passes='pseudo-probe,cgscc(inline)' -function-sections -mtriple=x86_64-unknown-linux-gnu -S -o %t ; RUN: FileCheck %s < %t --check-prefix=CHECK-IL -; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm -o %t1 +; RUN: llc -function-sections <%t -filetype=asm -o %t1 ; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM -; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=obj -o %t2 +; RUN: llc -function-sections <%t -filetype=obj -o %t2 ; RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llvm-mc -filetype=asm <%t1 -o %t3 ; RUN: FileCheck %s < %t3 --check-prefix=CHECK-ASM diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index 55cf453997e77..46511318ef316 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -1,10 +1,10 @@ ; REQUIRES: x86_64-linux ; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o %t ; RUN: FileCheck %s < %t --check-prefix=CHECK-IL -; RUN: llc %t -pseudo-probe-for-profiling -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR -; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=asm -o %t1 +; RUN: llc %t -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR +; RUN: llc %t -function-sections -filetype=asm -o %t1 ; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM -; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=obj -o %t2 +; RUN: llc %t -function-sections -filetype=obj -o %t2 ; RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llvm-mc %t1 -filetype=obj -o %t3 ; RUN: llvm-objdump --section-headers %t3 | FileCheck %s --check-prefix=CHECK-OBJ diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll index 9d89cad43aa79..569ab3522b141 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- -pseudo-probe-for-profiling -O3 | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- -O3 | FileCheck %s define float @foo(float %x) #0 { %tmp1 = fmul float %x, 3.000000e+00 diff --git a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test index b46479e10e5af..43ce080c7f92c 100644 --- a/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/truncated-pseudoprobe.test @@ -20,5 +20,5 @@ ; CHECK-NEXT: !Attributes: 1 ; truncated-pseudoprobe.perfbin is from the following compile commands: -; llc -pseudo-probe-for-profiling truncated-pseudoprobe.ll -filetype=obj -o truncated-pseudoprobe.o +; llc truncated-pseudoprobe.ll -filetype=obj -o truncated-pseudoprobe.o ; clang truncated-pseudoprobe.o -o truncated-pseudoprobe.perfbin _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits