hoy created this revision. Herald added subscribers: modimo, wenlei, hiraditya. hoy requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Pseudo probe instrumentation was missing from O0 build. It is needed in cases where some source files are built in O0 while the others are built in optimize mode. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D109638 Files: clang/test/CodeGen/pseudo-probe-emit.c llvm/lib/Passes/PassBuilder.cpp llvm/tools/llvm-profgen/PerfReader.cpp Index: llvm/tools/llvm-profgen/PerfReader.cpp =================================================================== --- llvm/tools/llvm-profgen/PerfReader.cpp +++ llvm/tools/llvm-profgen/PerfReader.cpp @@ -7,15 +7,23 @@ //===----------------------------------------------------------------------===// #include "PerfReader.h" #include "ProfileGenerator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" +#define DEBUG_TYPE "perf-reader" + +STATISTIC(NumStackSamplesWithInvalidReturnAddress, + "Number of stack samples with an invalid return address"); + +STATISTIC(NumStackSamples, "Number of stack samples"); + static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Print binary load events.")); cl::opt<bool> SkipSymbolization("skip-symbolization", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, - cl::desc("Dump the unsumbolized profile to the " + cl::desc("Dump the unsymbolized profile to the " "output file. It will show unwinder " "output for CS profile generation.")); @@ -510,15 +518,24 @@ if (!Binary->addressIsCode(FrameAddr)) break; - // We need to translate return address to call address - // for non-leaf frames + // We need to translate return address to call address for non-leaf frames. if (!CallStack.empty()) { - FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); + auto I = Binary->getIndexForAddr(FrameAddr); + FrameAddr = I ? Binary->getAddressforIndex(I - 1) : 0; + // Stop at an invalid return address caused by bad unwinding. This could + // happen to frame-pointer-based unwinding and the callee functions that + // do not have the frame pointer chain set up. + if (!FrameAddr || !Binary->addressIsCall(FrameAddr)) { + NumStackSamplesWithInvalidReturnAddress++; + break; + } } CallStack.emplace_back(FrameAddr); } + NumStackSamples++; + // Skip other unrelated line, find the next valid LBR line // Note that even for empty call stack, we should skip the address at the // bottom, otherwise the following pass may generate a truncated callstack Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1924,6 +1924,13 @@ ModulePassManager MPM; + // Perform pseudo probe instrumentation in O0 mode. This is for the + // consistency between different build modes. For example, a LTO build can be + // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in + // the postlink will require pseudo probe instrumentation in the prelink. + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(SampleProfileProbePass(TM)); + if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || PGOOpt->Action == PGOOptions::IRUse)) addPGOInstrPassesForO0( Index: clang/test/CodeGen/pseudo-probe-emit.c =================================================================== --- clang/test/CodeGen/pseudo-probe-emit.c +++ clang/test/CodeGen/pseudo-probe-emit.c @@ -1,3 +1,4 @@ +// RUN: %clang_cc1 -O0 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -O2 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // Check the generation of pseudoprobe intrinsic call
Index: llvm/tools/llvm-profgen/PerfReader.cpp =================================================================== --- llvm/tools/llvm-profgen/PerfReader.cpp +++ llvm/tools/llvm-profgen/PerfReader.cpp @@ -7,15 +7,23 @@ //===----------------------------------------------------------------------===// #include "PerfReader.h" #include "ProfileGenerator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" +#define DEBUG_TYPE "perf-reader" + +STATISTIC(NumStackSamplesWithInvalidReturnAddress, + "Number of stack samples with an invalid return address"); + +STATISTIC(NumStackSamples, "Number of stack samples"); + static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Print binary load events.")); cl::opt<bool> SkipSymbolization("skip-symbolization", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, - cl::desc("Dump the unsumbolized profile to the " + cl::desc("Dump the unsymbolized profile to the " "output file. It will show unwinder " "output for CS profile generation.")); @@ -510,15 +518,24 @@ if (!Binary->addressIsCode(FrameAddr)) break; - // We need to translate return address to call address - // for non-leaf frames + // We need to translate return address to call address for non-leaf frames. if (!CallStack.empty()) { - FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); + auto I = Binary->getIndexForAddr(FrameAddr); + FrameAddr = I ? Binary->getAddressforIndex(I - 1) : 0; + // Stop at an invalid return address caused by bad unwinding. This could + // happen to frame-pointer-based unwinding and the callee functions that + // do not have the frame pointer chain set up. + if (!FrameAddr || !Binary->addressIsCall(FrameAddr)) { + NumStackSamplesWithInvalidReturnAddress++; + break; + } } CallStack.emplace_back(FrameAddr); } + NumStackSamples++; + // Skip other unrelated line, find the next valid LBR line // Note that even for empty call stack, we should skip the address at the // bottom, otherwise the following pass may generate a truncated callstack Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1924,6 +1924,13 @@ ModulePassManager MPM; + // Perform pseudo probe instrumentation in O0 mode. This is for the + // consistency between different build modes. For example, a LTO build can be + // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in + // the postlink will require pseudo probe instrumentation in the prelink. + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(SampleProfileProbePass(TM)); + if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || PGOOpt->Action == PGOOptions::IRUse)) addPGOInstrPassesForO0( Index: clang/test/CodeGen/pseudo-probe-emit.c =================================================================== --- clang/test/CodeGen/pseudo-probe-emit.c +++ clang/test/CodeGen/pseudo-probe-emit.c @@ -1,3 +1,4 @@ +// RUN: %clang_cc1 -O0 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -O2 -fno-legacy-pass-manager -fpseudo-probe-for-profiling -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s // Check the generation of pseudoprobe intrinsic call
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits