Hi Serhei, I've done another round of reviews of this eu-stackprof patch set. My comments from the first round of reviews still apply and I've included some more below.
On Mon, Apr 13, 2026 at 5:31 PM Serhei Makarov <[email protected]> wrote: > > eu-stackprof is a new tool which profiles processes on a Linux system > using perf_events and outputs gprof gmon.out format program counter > histograms and callgraph-arc profiles; intended as an updated demo of > libdwfl_stacktrace functionality and as a data-gathering tool for the > profiledb initiative. > > * configure.ac: Add configure checks for C++20, eu-stackprof > perf/libpfm dependencies. > * src/Makefile.am (bin_PROGRAMS): Add stackprof. > (stackprof_*): Add stackprof SOURCES, LDADD, and so forth. > * src/stackprof.cxx: New file. NEWS should be updated too. > > Co-authored-by: <[email protected]> > Signed-off-by: <[email protected]> > --- > configure.ac | 23 +- > src/Makefile.am | 11 +- > src/stackprof.cxx | 2083 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 2110 insertions(+), 7 deletions(-) > create mode 100644 src/stackprof.cxx > > diff --git a/configure.ac b/configure.ac > index f22a3f90..e5be95b8 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -888,10 +888,21 @@ fi > AC_CHECK_PROG(HAVE_ZSTD, zstd, yes, no) > AM_CONDITIONAL([HAVE_ZSTD],[test "x$HAVE_ZSTD" = "xyes"]) > > -# For tests that need to use C++11 > -AX_CXX_COMPILE_STDCXX(11, noext, optional) > -AS_IF([test "x$HAVE_CXX11" = "x1"], [HAVE_CXX11=yes], [HAVE_CXX11=no]) > -AM_CONDITIONAL([HAVE_CXX11],[test "x$HAVE_CXX11" = "xyes"]) > +# For tests that need to use C++20 > +AX_CXX_COMPILE_STDCXX(20, noext, optional) > +AS_IF([test "x$HAVE_CXX20" = "x1"], [HAVE_CXX20=yes], [HAVE_CXX20=no]) > +AM_CONDITIONAL([HAVE_CXX20],[test "x$HAVE_CXX20" = "xyes"]) > + > + > +# For eu-stackprof > +# optional: > +AC_CHECK_HEADERS([perfmon/pfmlib_perf_event.h]) > +AM_CONDITIONAL([HAVE_LIBPFM], [test > "x${ac_cv_header_perfmon_pfmlib_perf_event_h}" = "xyes" ]) > +AC_CHECK_LIB(pfm, pfm_get_os_event_encoding, [AC_SUBST(libpfm_LIBS, > '-lpfm')]) > +# required: > +AC_CHECK_HEADERS([linux/perf_event.h]) > +AM_CONDITIONAL([ENABLE_STACKPROF],[test > "x${ac_cv_header_linux_perf_event_h}x${HAVE_CXX20}" = "xyesxyes" ]) > + > > AC_CHECK_HEADERS([execinfo.h]) > > @@ -941,7 +952,7 @@ AS_IF([test "x$with_libarchive" = "xyes" -a > "x$have_libarchive" != "xyes"], [ > # pronounce judgement on ability to build server, overridden by =yes/=no > if test "x$enable_debuginfod" = "xno"; then > true > -elif test "x$have_jsonc$HAVE_CXX11$have_libarchive$have_sqlite3" = > "xyesyesyesyes"; then > +elif test "x$have_jsonc$HAVE_CXX20$have_libarchive$have_sqlite3" = > "xyesyesyesyes"; then > enable_debuginfod=yes > elif test "x$enable_debuginfod" = "xyes"; then > AC_MSG_ERROR([unable to build debuginfod, missing libmicrohttpd, sqlite3 > or libarchive]) > @@ -1098,7 +1109,7 @@ AC_MSG_NOTICE([ > EXTRA TEST FEATURES (used with make check) > have bunzip2 installed (required) : ${HAVE_BUNZIP2} > have zstd installed : ${HAVE_ZSTD} > - C++11 : ${HAVE_CXX11} > + C++20 : ${HAVE_CXX20} > debug branch prediction : ${use_debugpred} > gprof support : ${use_gprof} > gcov support : ${use_gcov} > diff --git a/src/Makefile.am b/src/Makefile.am > index f041d458..f753c70c 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -35,6 +35,9 @@ bin_PROGRAMS = readelf nm size strip elflint findtextrel > addr2line \ > if ENABLE_STACKTRACE > bin_PROGRAMS += stacktrace > endif > +if ENABLE_STACKPROF > +bin_PROGRAMS += stackprof > +endif > > noinst_LIBRARIES = libar.a > > @@ -127,7 +130,13 @@ endif > elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) > elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD) > srcfiles_SOURCES = srcfiles.cxx > -srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD) > $(libarchive_LIBS) $(libdebuginfod) > +srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD) > $(libarchive_LIBS) $(libdebuginfod) > +if ENABLE_STACKPROF > +stackprof_SOURCES = stackprof.cxx > +stackprof_CPPFLAGS = $(AM_CPPFLAGS) $(jsonc_CXXFLAGS) > +stackprof_CXXFLAGS = -Wall > +stackprof_LDADD = $(libebl) $(libdw) $(libelf) $(libeu) $(argp_LDADD) > $(libpfm_LIBS) $(jsonc_LIBS) > +endif > > installcheck-binPROGRAMS: $(bin_PROGRAMS) > bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \ > diff --git a/src/stackprof.cxx b/src/stackprof.cxx > new file mode 100644 > index 00000000..33720e80 > --- /dev/null > +++ b/src/stackprof.cxx > @@ -0,0 +1,2083 @@ > +/* Collect stack-trace profiles of running program(s). > + Copyright (C) 2025-2026 Red Hat, Inc. > + This file is part of elfutils. > + > + This file is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + elfutils is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program. If not, see <http://www.gnu.org/licenses/>. */ > + > +#ifdef HAVE_CONFIG_H > +# include <config.h> > +#endif > + > +#include "printversion.h" > + > +#include <string> > +#include <memory> > +#include <iomanip> > +#include <map> > +#include <unordered_map> > +#include <vector> > +#include <bitset> > +#include <stdexcept> > +#include <cstring> > +#include <csignal> > +#include <cassert> > +#include <chrono> > +#include <iostream> > +#include <fstream> > +#include <sstream> > +#include <cinttypes> > +#include <format> > +#include <filesystem> > + > +#include <sys/utsname.h> > + > +#include <sys/syscall.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/wait.h> > +#include <poll.h> > +#ifdef HAVE_LINUX_PERF_EVENT_H > +#include <linux/perf_event.h> > +#endif > +#include <argp.h> > +#include <fcntl.h> > +#include <dirent.h> > + > +#include <system.h> > + > +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H > +#include <perfmon/pfmlib_perf_event.h> > +#endif > + > +#include <json-c/json.h> > + > +#include <gelf.h> > +#include <dwarf.h> > +#include <libdwfl.h> > +#include <libdw.h> > +#include "../libebl/libebl.h" > +#include "../libdwfl_stacktrace/libdwfl_stacktrace.h" > + > +using namespace std; // so we don't have to std:: prefix everything in here > + > +//////////////////////////////////////////////////////////////////////// > +// find_debuginfo callbacks > + > +#ifdef FIND_DEBUGINFO FIND_DEBUGINFO isn't defined anywhere. If the intention is to flesh this out in a future patch I'd prefer we just add handling for FIND_DEBUGINFO then. > + > +static char *debuginfo_path = NULL; > + > +static const Dwfl_Callbacks dwfl_cfi_callbacks = > + { > + .find_elf = dwflst_tracker_linux_proc_find_elf, > + .find_debuginfo = dwfl_standard_find_debuginfo, > + .debuginfo_path = &debuginfo_path, > + }; > + > +#else > + > +int > +nop_find_debuginfo (Dwfl_Module *mod __attribute__((unused)), > + void **userdata __attribute__((unused)), > + const char *modname __attribute__((unused)), > + GElf_Addr base __attribute__((unused)), > + const char *file_name __attribute__((unused)), > + const char *debuglink_file __attribute__((unused)), > + GElf_Word debuglink_crc __attribute__((unused)), > + char **debuginfo_file_name __attribute__((unused))) > +{ > +#ifdef DEBUG_MODULES > + cerr << format("nop_find_debuginfo: modname={} file_name={} > debuglink_file={}\n", modname, file_name, debuglink_file); > +#endif > + return -1; > +} > + > +static const Dwfl_Callbacks dwfl_cfi_callbacks = > +{ > + .find_elf = dwflst_tracker_linux_proc_find_elf, > + .find_debuginfo = nop_find_debuginfo, /* work with CFI only */ > +}; > + > +#endif /* FIND_DEBUGINFO */ > + > + > +//////////////////////////////////////////////////////////////////////// > +// class decls > + > +// Unwind statistics for a Dwfl and associated process. > +struct UnwindDwflStats { > + Dwfl *dwfl; > + string comm; > + int max_frames; /* for diagnostic purposes */ > + int total_samples; /* for diagnostic purposes */ > + int lost_samples; /* for diagnostic purposes */ > + int shown_errors; /* for diagnostic purposes */ > + Dwfl_Unwound_Source last_unwound; /* track CFI source, for diagnostic > purposes */ > + Dwfl_Unwound_Source worst_unwound; /* track CFI source, for diagnostic > purposes */ > +}; > + > +struct hash_arc { > + template <class T1, class T2> > + size_t operator()(const pair<T1, T2> &p) const { > + return hash<T1>()(p.first) ^ hash<T2>()(p.second); > + } > +}; > + > +// Unwind statistics for a single module identified by build-id. > +struct UnwindModuleStats { > + map<uint64_t, uint32_t> histogram; /* sorted by pc */ > + unordered_map<pair<uint64_t, uint64_t>, uint32_t, hash_arc> callgraph; > + > + void record_pc(Dwarf_Addr pc) { > + if (histogram.count(pc) == 0) > + histogram[pc]=1; > + else > + histogram[pc]++; > + } > + void record_callgraph_arc(Dwarf_Addr from, Dwarf_Addr to) { > + pair<uint64_t, uint64_t> arc(from, to); > + if (callgraph.count(arc) == 0) > + callgraph[arc]=1; > + else > + callgraph[arc]++; > + } > +}; > + > +struct UnwindStatsTable > +{ > + unordered_map<pid_t, UnwindDwflStats> dwfl_tab; > + unordered_map<string, UnwindModuleStats> buildid_tab; > + typedef map<string, UnwindModuleStats> buildid_map_t; > + > + UnwindStatsTable () {} > + ~UnwindStatsTable () {} > + > + UnwindDwflStats *pid_find_or_create(pid_t pid); > + string pid_find_comm(pid_t pid); > + Dwfl *pid_find_dwfl(pid_t pid); > + void pid_store_dwfl(pid_t pid, Dwfl *dwfl); > + > + UnwindModuleStats *buildid_find(string buildid); > + UnwindModuleStats *buildid_find_or_create(string buildid, Dwfl_Module > *mod); > + > + void print_summary() const; > +}; > + > +class PerfConsumer; > + > +// A PerfReader creates perf_events file descriptors, monitors the > +// mmap'd ring buffers for events, and dispatches decoded forms to a > +// PerfConsumer. > +class PerfReader > +{ > +private: > + /* Sized by number of CPUs or threads: */ > + vector<int> perf_fds; > + vector<perf_event_mmap_page *> perf_headers; > + vector<pollfd> pollfds; > + > + PerfConsumer* consumer; // pluralize! > + Ebl* default_ebl; > + uint64_t sample_regs_user; > + int sample_regs_count; > + bool enabled; > + int page_size; > + int page_count; > + int mmap_size; > + vector<uint8_t> event_wraparound_temp; // for events straddling ring > buffer end > + > + void decode_event(const perf_event_header* ehdr); > + > +public: > + // PerfReader(perf_event_attr* attr, int pid, PerfConsumer* consumer); // > attach to process hierarchy; may modify *attr > + PerfReader(perf_event_attr* attr, PerfConsumer* consumer, int pid=-1); > // systemwide; may modify *attr > + > + ~PerfReader(); > + > + void process_some(); // run briefly, relay decoded perf_events to consumer > + uint64_t regs_mask() { return this->sample_regs_user; } > + Ebl *ebl() { return this->default_ebl; } > +}; > + > +// A PerfConsumer receives both raw and decoded (fields split out into > function parameters) > +// perf event records from a PerfReader. Pure interface. > +class PerfConsumer > +{ > +protected: > + PerfReader *reader; /* access sample_regs_user etc. metadata */ > + > +public: > + PerfConsumer() {} > + PerfConsumer(PerfReader *reader) : reader(reader) {} > + void set_reader(PerfReader *reader) { this->reader = reader; } > + > + virtual ~PerfConsumer() {} > + virtual void process(const perf_event_header* sample) {} > + > + virtual void process_comm(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, bool exec, const > string& comm) {} > + virtual void process_exit(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) {} > + virtual void process_fork(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) {} > + virtual void process_sample(const perf_event_header* sample, > + uint64_t ip, > + uint32_t pid, uint32_t tid, > + uint64_t time, > + uint64_t abi, > + uint32_t nregs, const uint64_t *regs, > + uint64_t data_size, const uint8_t *data) {} > + virtual void process_mmap2(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, > + uint64_t addr, uint64_t len, uint64_t pgoff, > + uint8_t build_id_size, const uint8_t *build_id, > + const char *filename) {} > +}; > + > +// A StatsPerfConsumer is a toy concrete object that accepts decoded > +// perf events and logs and records basic stats about them. > +class StatsPerfConsumer: public PerfConsumer > +{ > + unordered_map<int,unsigned> event_type_counts; > + > +public: > + StatsPerfConsumer() {} > + ~StatsPerfConsumer(); // report to stdout > + void process_comm(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, bool exec, const string& > comm); > + void process_exit(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid); > + void process_fork(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid); > + void process_sample(const perf_event_header* sample, > + uint64_t ip, > + uint32_t pid, uint32_t tid, > + uint64_t time, > + uint64_t abi, > + uint32_t nregs, const uint64_t *regs, > + uint64_t data_size, const uint8_t *data); > + void process_mmap2(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, > + uint64_t addr, uint64_t len, uint64_t pgoff, > + uint8_t build_id_size, const uint8_t *build_id, > + const char *filename); > + void process(const perf_event_header* sample); > +}; > + > +// An UnwindSample records an unwound call stack from a perf-event > +// sample. > +struct UnwindSample > +{ > + const perf_event_header *event; > + Dwfl *dwfl; > + uint32_t pid, tid; > + vector<Dwarf_Addr> addrs; > + int elfclass; > + > + Dwarf_Addr base; /* for diagnostic purposes */ > + Dwarf_Addr sp; /* for diagnostic purposes */ > +}; > + > +class UnwindSampleConsumer; > + > +// A PerfConsumerUnwinder accepts decoded perf events, and produces > +// UnwindSample objects from them for relaying to an > +// UnwindSampleConsumer. > +class PerfConsumerUnwinder: public PerfConsumer > +{ > + UnwindSampleConsumer *consumer; > + UnwindSample last_us; // XXX: why & is this safe to hang onto? > + Dwflst_Process_Tracker *tracker; > + UnwindStatsTable *stats; > + unsigned maxframes; > + > + int find_procfile(Dwfl *dwfl, pid_t *pid, Elf **elf, int *elf_fd); > + Dwfl *find_dwfl(pid_t pid, const uint64_t *regs, uint32_t nregs, > + Elf **elf, bool *cached); > + > + int get_sp_reg(bool is_abi32); > + > +public: > + PerfConsumerUnwinder(UnwindSampleConsumer* usc, UnwindStatsTable *ust); > + PerfConsumerUnwinder(UnwindSampleConsumer* usc, UnwindStatsTable *ust, > PerfReader *reader); > + ~PerfConsumerUnwinder(); > + > + /* libdwfl{st} callbacks */ > + Dwfl *init_dwfl(pid_t pid); > + int unwind_frame_cb(Dwfl_Frame *state); > + > + void process_comm(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, bool exec, const string& > comm); > + void process_exit(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid); > + void process_fork(const perf_event_header* sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid); > + void process_sample(const perf_event_header* sample, > + uint64_t ip, > + uint32_t pid, uint32_t tid, > + uint64_t time, > + uint64_t abi, > + uint32_t nregs, const uint64_t *regs, > + uint64_t data_size, const uint8_t *data); > + void process_mmap2(const perf_event_header* sample, > + uint32_t pid, uint32_t tid, > + uint64_t addr, uint64_t len, uint64_t pgoff, > + uint8_t build_id_size, const uint8_t *build_id, > + const char *filename); > +}; > + > +// An UnwindSampleConsumer receives an UnwindSample from a > PerfConsumerUnwinder. > +// Pure abstract. > +class UnwindSampleConsumer > +{ > +public: > + UnwindSampleConsumer() {} > + virtual ~UnwindSampleConsumer() {} > + virtual void process(const UnwindSample* sample) = 0; > + virtual int maxframes() = 0; > +}; > + > + > +// An UnwindStatsConsumer is a toy that just collects statistics about > +// a received stream of UnwindSamples. > +class UnwindStatsConsumer: public UnwindSampleConsumer > +{ > + UnwindStatsTable *stats; > + > +public: > + UnwindStatsConsumer(UnwindStatsTable *usc) : stats(usc) {} > + ~UnwindStatsConsumer(); > + void process(const UnwindSample* sample); > + int maxframes(); > +}; > + > + > +// An GprofUnwindSampleConsumer instance consumes UnwindSamples and tabulates > +// them by buildid, for eventual writing out into gmon.out format files. > +class GprofUnwindSampleConsumer: public UnwindSampleConsumer > +{ > + UnwindStatsTable *stats; > + unordered_map<string, string> buildid_to_mainfile; > + unordered_map<string, string> buildid_to_debugfile; > + void record_gmon_hist(ostream &of, map<uint64_t, uint32_t> &histogram, > uint64_t low_pc, uint64_t high_pc, uint64_t alignment); > + > +public: > + GprofUnwindSampleConsumer(UnwindStatsTable *usc) : stats(usc) {} > + ~GprofUnwindSampleConsumer(); // write out all the gmon.$BUILDID.out files > + void record_gmon_out(const string& buildid, UnwindModuleStats& m); // > write out one gmon.$BUILDID.out file > + void process(const UnwindSample* sample); // accumulate hits / callgraph > edges (need maxdepth=1 only) > + int maxframes(); > +}; > + > +// hypothetical: FlamegraphUnwindSampleConsumer, taking in a bigger maxdepth > +// hypothetical: PprofUnwindSampleConsumer, https://github.com/google/pprof > + > + > +//////////////////////////////////////////////////////////////////////// > +// command line parsing and main() > + > +/* Name and version of program. */ > +ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; > + > +/* Bug report address. */ > +ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; > + > +#define HIST_SPLIT_OPTS "none/even/flex" > + > +/* Definitions of arguments for argp functions. */ > +static const struct argp_option options[] = > +{ > + { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 }, > + { "verbose", 'v', NULL, 0, N_ ("Increase verbosity of logging messages > (modules/samples/frames/more)."), 0 }, > + /* TODO: Add "quiet" option suppressing summary table. */ > + { "gmon", 'g', NULL, 0, N_("Generate gmon.BUILDID.out files for each > binary."), 0 }, > + { "hist-split",'G', HIST_SPLIT_OPTS, 0, N_("Histogram splitting method for > gmon, default 'even'."), 0 }, It would be helpful to very briefly describe the difference between even and flex here. > + { "maxframes", 'n', "MAXFRAMES", 0, N_("Maximum number of frames to > unwind, default 1 with --gmon, 256 otherwise."), 0 }, /* TODO */ > + { "output", 'o', "DIR", 0, N_("Output directory for gmon files."), 0 }, > + { "force", 'f', NULL, 0, N_("Unlink output files to force writing as > new."), 0 }, > + { "pid", 'p', "PID", 0, N_("Profile given PID, and its future children."), > 0 }, > +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H > + { "event", 'e', "EVENT", 0, N_("Sample given LIBPFM event > specification."), 0 }, > +#define ARGP_KEY_EVENT_LIST 0x1000 > + { "event-list", ARGP_KEY_EVENT_LIST, NULL, 0, N_("Sample given LIBPFM > event specification."), 0 }, > +#endif > + { NULL, 0, NULL, 0, NULL, 0 } > +}; > + > +static error_t parse_opt (int key, char *arg, struct argp_state *state); > +static const struct argp argp = > + { > + options, parse_opt, "[--] [CMD]...", N_("Collect systemwide stack-trace > profiles."), > + NULL, NULL, NULL > + }; > + > +// How to divide the program counter histograms in gmon output: > +enum hist_split_method { > + HIST_SPLIT_NONE = 0, /* one histogram for the entire executable */ > + HIST_SPLIT_EVEN = 1, /* all histograms the same size */ > + HIST_SPLIT_FLEX = 2, /* variable-size histograms */ > +}; > + > +// Globals set based on command line options: > +static unsigned verbose; > +static bool gmon; > +static hist_split_method gmon_hist_split = HIST_SPLIT_EVEN; > +static string output_dir = "."; > +static bool output_force = false; // overwrite preexisting output files? > +static int pid; > +static int opt_maxframes = -1; // set to >= 0 to override default maxframes > in consumer > +static string libpfm_event; > +static string libpfm_event_decoded; > +static perf_event_attr attr; > +static bool branch_record = false; // using accurate branch recording for > call-graph arcs rather than backtrace heuristics branch_record is never set to true. Is this intended? > + > +// Verbosity categories: > +static bool show_summary = true; /* XXX could suppress with --quiet */ > +static bool show_modules = false; /* -> first sample for each module */ > +static bool show_samples = false; /* -> every sample */ > +static bool show_frames = false; > +static bool show_debugfile = false; > +static bool show_tmi = false; /* -> perf, cfi details */ > + > +static error_t > +parse_opt (int key, char *arg, struct argp_state *state) > +{ > + (void)state; > + > + switch (key) > + { > + case ARGP_KEY_INIT: > + break; > + > + case 'v': > + verbose ++; > + break; > + > + case 'g': > + gmon = true; > + break; > + > + case 'G': > + gmon = true; /* Automatically enable gmon mode if they set a gmon > option. */ > + if (std::string_view(arg) == "none") > + gmon_hist_split = HIST_SPLIT_NONE; > + else if (std::string_view(arg) == "even") > + gmon_hist_split = HIST_SPLIT_EVEN; > + else if (std::string_view(arg) == "flex") > + gmon_hist_split = HIST_SPLIT_FLEX; > + break; > + > + case 'o': > + gmon = true; > + output_dir = arg; > + break; > + > + case 'p': > + pid = atoi(arg); We should have some error checking that pid > 0 in case arg isn't a valid pid. > + break; > + > + case 'n': > + opt_maxframes = atoi(arg); > + if (opt_maxframes < 0) > + { > + argp_error (state, N_("-n MAXFRAMES should be 0 or higher.")); > + return EINVAL; > + } > + break; > + > + case 'f': > + output_force = true; > + break; > + > +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H > + case 'e': > + libpfm_event = arg; > + break; > + > + case ARGP_KEY_EVENT_LIST: > + { > + pfm_pmu_info_t pinfo; > + pfm_event_info_t info; > + > + pfm_err_t rc = pfm_initialize(); > + if (rc != PFM_SUCCESS) > + { > + cerr << format("ERROR: pfm_initialized failed: {}\n", > pfm_strerror(rc)); > + exit(1); > + } > + > + memset(&pinfo, 0, sizeof(pinfo)); > + memset(&info, 0, sizeof(info)); > + pinfo.size = sizeof(pinfo); > + info.size = sizeof(info); > + > + for(int j= PFM_PMU_NONE ; j< PFM_PMU_MAX; j++) > + { > + pfm_err_t ret = pfm_get_pmu_info((pfm_pmu_t) j, &pinfo); > + if (ret != PFM_SUCCESS) > + continue; > + if (! pinfo.is_present) > + continue; > + for (int i = pinfo.first_event; i != -1; i = > pfm_get_event_next(i)) > + { > + ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, &info); > + if (ret == PFM_SUCCESS) > + clog << format("{}::{}\n", pinfo.name, info.name); > + } > + } > + } > + exit(0); > +#endif > + > + default: > + return ARGP_ERR_UNKNOWN; > + } > + return 0; > +} > + > +sig_atomic_t interrupted; > + > +void sigint_handler(int sig) > +{ > + interrupted ++; > + if (interrupted > 1) > + _exit(1); > +} > + > +int > +main (int argc, char *argv[]) > +{ > + int remaining; > + int pipefd[2] = {-1, -1}; // for CMD child process post-fork sync > + bool has_cmd = false; > + (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL); > + > + /* show_summary is true by default */ > + if (verbose > 0) show_modules = true; > + if (verbose > 1) show_samples = true; > + if (verbose > 2) show_frames = true; > + if (verbose > 3) show_debugfile = true; > + if (verbose > 4) show_tmi = true; > + > + if (pid > 0 && remaining < argc) // got a pid AND a cmd? reject > + { > + cerr << format("ERROR: Must not specify both -p PID and CMD\n"); > + exit(1); > + } > + > + bool systemwide = (pid == 0) || (remaining == argc); > + (void) systemwide; > + > + try > + { > + memset(&attr, 0, sizeof(attr)); > + attr.size = sizeof(attr); > + > + if (libpfm_event != "") > + { > +#if HAVE_PERFMON_PFMLIB_PERF_EVENT_H > + pfm_err_t rc = pfm_initialize(); > + if (rc != PFM_SUCCESS) > + { > + cerr << format("ERROR: pfm_initialized failed: {}\n", > pfm_strerror(rc)); > + exit(1); > + } > + char* fstr = nullptr; > + pfm_perf_encode_arg_t arg = { .attr = &attr, .fstr=&fstr, .size = > sizeof(arg) }; > + rc = pfm_get_os_event_encoding(libpfm_event.c_str(), > + PFM_PLM3, /* userspace, whether > systemwide or not */ > + PFM_OS_PERF_EVENT_EXT, &arg); > + if (rc != PFM_SUCCESS) > + { > + cerr << format("ERROR: pfm_get_os_event_encoding failed: {}\n", > pfm_strerror(rc)); > + exit(1); > + } > + if (verbose) > + { > + clog << format("libpfm expanded {} to {}\n", libpfm_event, > fstr); > + } > + libpfm_event_decoded = fstr; // overwrite > + free(fstr); > +#endif > + } > + else > + { > + // same as: -e perf::CPU-CLOCK:freq=1000 > + attr.type = PERF_TYPE_SOFTWARE; > + attr.config = PERF_COUNT_SW_CPU_CLOCK; > + attr.sample_freq = 1000; > + attr.freq = 1; > + attr.exclude_kernel = 1; > + attr.exclude_hv = 1; > + attr.exclude_guest = 1; > + } > + > + if (show_summary) > + { > + clog << format("perf_event_attr configuration type={:x} config={:x} > {}{}\n", > + attr.type, attr.config, > + (attr.freq ? "sample_freq=" : "sample_period="), > + (attr.freq ? attr.sample_freq : > attr.sample_period)); > + clog << endl; > + } > + > + if (remaining < argc) // got a CMD... suffix? ok start it > + { > + has_cmd = true; > + int rc = pipe (pipefd); // will use pipefd[] >= 0 as flag for > synchronization just below > + if (rc < 0) > + { > + cerr << format("ERROR: pipe failed: {}\n", strerror(errno)); > + exit(1); > + } > + > + pid = fork(); > + if (pid == 0) // in child > + { > + close (pipefd[1]); // close write end > + char dummy; > + int rc = read (pipefd[0], &dummy, 1); // block until parent is > ready > + if (rc != 1) > + { > + cerr << format("ERROR: child sync read failed: {}\n", > strerror(errno)); > + exit(1); > + } > + close (pipefd[0]); > + execvp (argv[remaining], & argv[remaining] /* not +1: child > argv[0] included! */ ); > + // notreached unless error > + cerr << format("ERROR: execvp failed: {}\n", strerror(errno)); > + exit(1); > + } > + else if (pid > 0) // in parent > + { > + close (pipefd[0]); // close read end > + // will write to pipefd[1] after perfreader sicced at child > + } > + else // error > + { > + cerr << format("ERROR: fork failed: {}\n", strerror(errno)); > + exit(1); > + } > + } > + > + // Create the perf processing pipeline as per command line options > + PerfReader *pr = nullptr; > + UnwindStatsTable *tab = nullptr; > + UnwindSampleConsumer *usc = nullptr; > + PerfConsumerUnwinder *pcu = nullptr; > + StatsPerfConsumer *spc = nullptr; > + > + if (gmon) > + { > + tab = new UnwindStatsTable(); > + usc = new GprofUnwindSampleConsumer(tab); > + pcu = new PerfConsumerUnwinder(usc, tab); > + pr = new PerfReader(&attr, pcu, pid); > + } > + else > + { > + tab = new UnwindStatsTable(); > + usc = new UnwindStatsConsumer(tab); > + pcu = new PerfConsumerUnwinder (usc, tab); > + pr = new PerfReader(&attr, pcu, pid); > +#if 0 > + spc = new StatsPerfConsumer(); > + pr = new PerfReader(&attr, spc, pid); > +#endif Do we need this #if 0 and StatsPerfConsumer in the committed code? A comment at the StatsPerfConsumer definition says it's a "toy concrete object". That suggests to me that it was just for manual testing during the initial development of eu-stackprof. If it's intended to remain for debug builds, I suggest defining a STACKPROF_DEBUG macro (or similar name) near the top of stackprof.cxx for easier toggling. > + } > + > + signal(SIGINT, sigint_handler); > + signal(SIGTERM, sigint_handler); > + > + if (pid > 0 && has_cmd) // need to release child CMD process? > + { > + int rc = write(pipefd[1], "x", 1); // unblock child > + assert (rc == 1); > + close(pipefd[1]); > + } > + > + if (show_summary) > + { > + clog << "Starting stack profile collection "; > + if (pid) clog << format("pid {}", pid); > + else clog << "systemwide"; > + clog << "\n"; > + } > + > + while (true) // main loop > + { > + if (interrupted) break; > + if (pid > 0) waitpid(pid, NULL, WNOHANG); // reap dead child to > allow kill(pid, 0) to signal death > + if (pid > 0 && kill(pid, 0) != 0) break; // exit if child or > targeted non-child process died > + pr->process_some(); > + } > + > + delete pr; > + delete usc; > + delete pcu; > + delete spc; > + delete tab; > + > + // reporting done in various destructors > + } > + catch (const exception& e) > + { > + cerr << format("{}\n", e.what()); > + } > + > + return 0; > +} > + > + > +//////////////////////////////////////////////////////////////////////// > +// perf reader > + > +PerfReader::PerfReader(perf_event_attr* attr, PerfConsumer* consumer, int > pid) > +{ > + this->page_size = getpagesize(); > + this->page_count = 64; /* XXX May want to verify if this is a large-enough > power-of-2. */ > + this->mmap_size = this->page_size * (this->page_count + 1); // total mmap > size, incl header page > + this->event_wraparound_temp.resize(this->mmap_size); // NB: never resize > this object again! > + this->consumer = consumer; > + this->consumer->set_reader(this); > + this->enabled = false; > + > + struct utsname u; > + uname(&u); > + int em = EM_NONE; > + std::string_view machine = u.machine; > + if (machine == "x86_64") em = EM_X86_64; > + else if (machine == "i686" || machine == "i386") em = EM_386; > + else if (machine == "aarch64" || machine == "armv7l") em = EM_ARM; > + else { > + cerr << format("ERROR: Unsupported architecture: {}\n", u.machine); > + exit(1); > + } > + this->default_ebl = ebl_openbackend_machine(em); > + this->sample_regs_user = ebl_perf_frame_regs_mask (this->default_ebl); > + this->sample_regs_count = bitset<64>(this->sample_regs_user).count(); > + > + attr->sample_regs_user = this->sample_regs_user; > + attr->sample_stack_user = 8192; // enough? > + attr->sample_type = (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME); > + attr->sample_type |= PERF_SAMPLE_REGS_USER; > + attr->sample_type |= PERF_SAMPLE_STACK_USER; > + // XXX Maybe: ask for PERF_SAMPLE_CALLCHAIN, in case kernel can > + // unwind for us? Would want an option to control this, to allow > + // eu-stackprof to exercise our own unwinding functionality when > + // testing. > + attr->mmap = 1; > + attr->mmap2 = 1; > + attr->exclude_kernel = 1; /* in-kernel unwinding not relevant for our > usecase */ > + attr->disabled = 1; /* will get enabled soon */ > + attr->task = 1; // catch FORK/EXIT > + attr->comm = 1; // catch EXEC > + attr->comm_exec = 1; // catch EXEC > + // attr->precise_ip = 2; // request 0 skid ... but that conflicts with > PERF_COUNT_HW_BRANCH_INSTRUCTIONS:freq=4000 > + attr->build_id = 1; // request build ids in MMAP2 events > + > + if (pid > 0) // actually only once, to allow break in case of error > + attr->inherit = 1; // propagate to child processes > + > + > + if (show_tmi) > + { // hexdump attr > + clog << "perf_event_attr hexdump: "; > + auto bytes = (unsigned char*) attr; > + for (size_t x = 0; x<sizeof(*attr); x++) > + clog << ((x % 8) ? "" : " ") > + << ((x % 32) ? "" : "\n") > + << format("{:02x}", (unsigned)bytes[x]); > + clog << "\n"; > + } > + > + // Iterate over all cpus, even if attaching to a single pid, because > + // we set ->inherit=1. That requires possible concurrency, which is > + // enabled by per-cpu ring buffers. > + int ncpus = sysconf(_SC_NPROCESSORS_CONF); > + for (int cpu=0; cpu<ncpus; cpu++) > + { > + int fd = syscall(__NR_perf_event_open, attr, > + (pid > 0 ? pid : -1), cpu, -1, > + PERF_FLAG_FD_CLOEXEC); > + if (fd < 0) > + { > + cerr << format("WARNING: unable to open perf event for cpu {}: > {}\n", cpu, strerror(errno)); > + continue; > + } > + void *buf = mmap(NULL, this->mmap_size, PROT_READ | PROT_WRITE, > MAP_SHARED, fd, 0); > + if (buf == MAP_FAILED) > + { > + cerr << format("ERROR: perf event mmap failed: {}\n", > strerror(errno)); > + close(fd); > + continue; > + } > + this->perf_fds.push_back(fd); > + this->perf_headers.push_back((perf_event_mmap_page*) buf); > + struct pollfd pfd = {.fd = fd, .events=POLLIN}; > + this->pollfds.push_back(pfd); > + } > + > + if (this->perf_fds.size() == 0) > + throw runtime_error("ERROR: no perf events opened"); > +} > + > +PerfReader::~PerfReader() > +{ > + for (auto fd : this->perf_fds) > + close(fd); > + for (auto m : this->perf_headers) > + munmap((void*) m, this->mmap_size); > + ebl_closebackend (this->default_ebl); > +} > + > +uint64_t millis_monotonic() > +{ > + return > chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now().time_since_epoch()).count(); > +} > + > +static inline uint64_t > +ring_buffer_read_head(volatile struct perf_event_mmap_page *base) > +{ > + uint64_t head = base->data_head; > + asm volatile("" ::: "memory"); // memory fence > + return head; > +} > + > +static inline void > +ring_buffer_write_tail(volatile struct perf_event_mmap_page *base, > + uint64_t tail) > +{ > + asm volatile("" ::: "memory"); // memory fence > + base->data_tail = tail; > +} > + > +void PerfReader::process_some() > +{ > + if (! this->enabled) > + { > + for (auto fd : this->perf_fds) > + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0 /* value ignored */); > + this->enabled = true; > + } > + > + uint64_t starttime = millis_monotonic(); > + uint64_t endtime = starttime + 1000; // run at most one second > + uint64_t ring_buffer_size = this->page_size * this->page_count; // just > the ring buffer size > + > + while (! interrupted) > + { > + uint64_t now = millis_monotonic(); > + if (endtime < now) > + break; > + int ready = poll(this->pollfds.data(), this->pollfds.size(), > (int)(endtime-now)); // wait a little while > + if (ready < 0) > + break; > + > + for (size_t i = 0; i < pollfds.size(); i++) > + if (this->pollfds[i].revents & POLLIN) // found an fd with fresh > yummy events > + { > + perf_event_mmap_page *header = perf_headers[i]; > + uint64_t data_head = ring_buffer_read_head(header); > + uint64_t data_tail = header->data_tail; > + uint8_t *base = ((uint8_t *) header) + this->page_size; > + struct perf_event_header *ehdr; > + size_t ehdr_size; > + > + while (data_head != data_tail) // consume all packets in ring > buffer XXX why? > + { > + ehdr = (perf_event_header*) (base + (data_tail & > (ring_buffer_size - 1))); > + ehdr_size = ehdr->size; > + if (show_tmi) > + clog << format("perf head={:p} tail={:p} ehdr={:p} > size={:d}{:x}\n", > + (void*) data_head, (void*) data_tail, > (void*) ehdr, ehdr_size, 0); > + > + if (((uint8_t *)ehdr) + ehdr_size > base + ring_buffer_size) > // mmap region wraparound? > + { > + // need to copy it to a contiguous temporary > + uint8_t *copy_start = (uint8_t*) ehdr; > + size_t len_first = base + ring_buffer_size - copy_start; > + size_t len_secnd = ehdr_size - len_first; > + uint8_t *event_temp = this->event_wraparound_temp.data(); > + memcpy(event_temp, copy_start, len_first); // part > at end of mmap'd region > + memcpy(event_temp + len_first, base, len_secnd); // part > at beginning of mmap'd region > + ehdr = (perf_event_header*) event_temp; > + } > + > + this->decode_event(ehdr); > + data_tail += ehdr_size; > + } > + > + ring_buffer_write_tail(header, data_tail); > + } > + } > +} > + > +void PerfReader::decode_event(const perf_event_header* ehdr) > +{ > + consumer->process(ehdr); // allow general processing > + > + // and decode into individual event types > + switch (ehdr->type) > + { > + case PERF_RECORD_SAMPLE: > + { > + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) + > sizeof(perf_event_header); > + uint64_t ip = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint64_t time = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + // PERF_SAMPLE_CALLCHAIN would be here if requested > + uint64_t abi = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + uint32_t nregs = this->sample_regs_count; > + const uint64_t* regs = reinterpret_cast<const uint64_t*>(data); data > += nregs * sizeof(uint64_t); > + uint64_t data_size = *reinterpret_cast<const uint64_t*>(data); data > += sizeof(uint64_t); > + const uint8_t* stack_data = data; > + consumer->process_sample(ehdr, ip, pid, tid, time, abi, nregs, regs, > data_size, stack_data); > + break; > + } > + case PERF_RECORD_COMM: > + { > + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) + > sizeof(perf_event_header); > + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + const char* comm = reinterpret_cast<const char*>(data); > + consumer->process_comm(ehdr, pid, tid, (ehdr->misc & > PERF_RECORD_MISC_COMM_EXEC), comm); > + break; > + } > + case PERF_RECORD_EXIT: > + { > + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) + > sizeof(perf_event_header); > + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t ppid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t ptid = *reinterpret_cast<const uint32_t*>(data); > + consumer->process_exit(ehdr, pid, ppid, tid, ptid); > + break; > + } > + case PERF_RECORD_FORK: > + { > + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) + > sizeof(perf_event_header); > + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t ppid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t ptid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + consumer->process_fork(ehdr, pid, ppid, tid, ptid); > + break; > + } > + case PERF_RECORD_MMAP2: > + { > + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) + > sizeof(perf_event_header); > + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += > sizeof(uint32_t); > + uint64_t addr = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + uint64_t len = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + uint64_t pgoff = *reinterpret_cast<const uint64_t*>(data); data += > sizeof(uint64_t); > + uint8_t build_id_size = 0; > + const uint8_t* build_id = nullptr; > + if (ehdr->misc & PERF_RECORD_MISC_MMAP_BUILD_ID) > + { > + build_id_size = *reinterpret_cast<const uint8_t*>(data); data += > sizeof(uint8_t); > + data += sizeof(uint8_t) + sizeof(uint16_t); // skip padding > + build_id = reinterpret_cast<const uint8_t*>(data); > + data += build_id_size; > + } > + else > + { > + data += 4 + 4 + 8 + 8; // maj, min, ino, ino_generation > + } > + data += sizeof(uint32_t) + sizeof(uint32_t); // prot, flags > + const char* filename = reinterpret_cast<const char*>(data); > + consumer->process_mmap2(ehdr, pid, tid, addr, len, pgoff, > build_id_size, build_id, filename); > + break; > + } > + default: > + break; > + } > +} > + > + > +//////////////////////////////////////////////////////////////////////// > +// perf event consumers > + > +void StatsPerfConsumer::process_comm(const perf_event_header *sample, > + uint32_t pid, uint32_t tid, bool exec, > const string &comm) > +{ > + if (show_modules) > + { > + clog << format("process_comm: pid={} tid={} exec={} comm={}\n", pid, > tid, exec, comm); > + } > +} > + > +void StatsPerfConsumer::process_exit(const perf_event_header *sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) > +{ > + if (show_modules) > + { > + clog << format("process_exit: pid={} ppid={} tid={} ptid={}\n", pid, > ppid, tid, ptid); > + } > +} > + > +void StatsPerfConsumer::process_fork(const perf_event_header *sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) > +{ > + if (show_modules) > + { > + clog << format("process_fork: pid={} ppid={} tid={} ptid={}\n", pid, > ppid, tid, ptid); > + } > +} > + > +void StatsPerfConsumer::process_sample(const perf_event_header *sample, > + uint64_t ip, > + uint32_t pid, uint32_t tid, > + uint64_t time, > + uint64_t abi, > + uint32_t nregs, const uint64_t *regs, > + uint64_t data_size, const uint8_t > *data) > +{ > + if (show_samples) > + { > + clog << format("process_sample: pid={:d} tid={:d} ip={:x} time={:d} > abi={:d} nregs={:d} data_size={:d}\n", > + pid, tid, ip, time, abi, nregs, data_size); > + } > +} > + > +void StatsPerfConsumer::process_mmap2(const perf_event_header *sample, > + uint32_t pid, uint32_t tid, > + uint64_t addr, uint64_t len, uint64_t > pgoff, > + uint8_t build_id_size, const uint8_t > *build_id, > + const char *filename) > +{ > + if (show_modules) > + { > + clog << format("process_mmap2: pid={:d} tid={:d} addr={:x} len={:x} > pgoff={:x} build_id_size={:d} filename={:s}\n", > + pid, tid, addr, len, pgoff, > (unsigned)build_id_size, filename); > + } > +} > + > +StatsPerfConsumer::~StatsPerfConsumer() > +{ > + for (const auto& kv : this->event_type_counts) > + { > + clog << format("event type {} count {}\n", kv.first, kv.second); > + } > +} > + > +void StatsPerfConsumer::process(const perf_event_header* ehdr) > +{ > + this->event_type_counts[ehdr->type] ++; > +} > + > + > +////////////////////////////////////////////////////////////////////// > +// unwind stats table for PerfConsumerUnwinder + downstream consumers > + > +UnwindDwflStats *UnwindStatsTable::pid_find_or_create (pid_t pid) > +{ > + if (this->dwfl_tab.count(pid) == 0) > + this->dwfl_tab.emplace(pid, UnwindDwflStats()); > + return &this->dwfl_tab[pid]; > +} > + > +static const string unknown_comm = "<unknown>"; > + > +string UnwindStatsTable::pid_find_comm (pid_t pid) > +{ > + UnwindDwflStats *entry = this->pid_find_or_create(pid); > + if (entry == NULL) > + return unknown_comm; > + if (!entry->comm.empty()) > + return entry->comm; > + string name = format("/proc/{}/comm", pid); > + ifstream procfile(name); > + string buf; > + if (!procfile || !getline(procfile, buf)) > + entry->comm = unknown_comm; > + else > + entry->comm = buf; > + > + return entry->comm; > +} > + > +Dwfl *UnwindStatsTable::pid_find_dwfl (pid_t pid) > +{ > + if (this->dwfl_tab.count(pid) == 0) > + return NULL; > + return this->dwfl_tab[pid].dwfl; > +} > + > +void UnwindStatsTable::pid_store_dwfl (pid_t pid, Dwfl *dwfl) > +{ > + UnwindDwflStats *entry = this->pid_find_or_create(pid); > + if (entry == NULL) > + return; > + entry->dwfl = dwfl; > + if (show_summary) > + this->pid_find_comm(pid); > + return; > +} > + > +UnwindModuleStats *UnwindStatsTable::buildid_find (string buildid) > +{ > + if (this->buildid_tab.count(buildid) == 0) > + return NULL; > + return &this->buildid_tab[buildid]; > +} > + > +UnwindModuleStats *UnwindStatsTable::buildid_find_or_create (string buildid, > Dwfl_Module *mod) > +{ > + if (this->buildid_tab.count(buildid) == 0) > + { > + this->buildid_tab.emplace(buildid, UnwindModuleStats()); > + /* TODO: Guess text range for mod? */ > + (void)mod; > + } > + return &this->buildid_tab[buildid]; > +} > + > +void UnwindStatsTable::print_summary () const > +{ > +#define PERCENT(x,tot) ((x+tot == 0)?0.0:((double)x)/((double)tot)*100.0) > + int total_samples = 0; > + int total_lost_samples = 0; > + clog << "\n=== pid / sample counts ===\n"; > + for (auto& p : this->dwfl_tab) > + { > + pid_t pid = p.first; > + const UnwindDwflStats& d = p.second; > + clog << format(N_("{} {} -- max {} frames, received {} samples, lost > {} samples ({:.1f}%) (last {}, worst {})\n"), > + pid, d.comm, d.max_frames, > + d.total_samples, d.lost_samples, > + PERCENT(d.lost_samples, d.total_samples), > + dwfl_unwound_source_str(d.last_unwound), > + dwfl_unwound_source_str(d.worst_unwound)); > + total_samples += d.total_samples; > + total_lost_samples += d.lost_samples; > + } > + clog << "===\n"; > + clog << format(N_("TOTAL -- received {} samples, lost {} samples, loaded > {} processes\n"), > + total_samples, total_lost_samples, > + this->dwfl_tab.size() /* TODO: If implementing eviction, need to > maintain a separate count of evicted pids. */); > + clog << "\n"; > +#undef PERCENT > +} > + > +//////////////////////////////////////////////////////////////////////// > +// real perf consumer: unwind helpers > + > +PerfConsumerUnwinder::PerfConsumerUnwinder(UnwindSampleConsumer* usc, > UnwindStatsTable *ust) > + : consumer(usc), stats(ust) { > + maxframes = usc->maxframes(); > + this->tracker = dwflst_tracker_begin (&dwfl_cfi_callbacks); > +} > + > +PerfConsumerUnwinder::PerfConsumerUnwinder(UnwindSampleConsumer* usc, > UnwindStatsTable *ust, PerfReader *reader) > + : consumer(usc), stats(ust) { > + maxframes = usc->maxframes(); > + this->reader = reader; > + this->tracker = dwflst_tracker_begin (&dwfl_cfi_callbacks); > +} > + > +PerfConsumerUnwinder::~PerfConsumerUnwinder() { > + dwflst_tracker_end (this->tracker); > +} > + > +/* TODO: Could be relocated to libdwfl/linux-pid-attach.c > + to remove some duplication of existing linux-pid-attach code. */ > +int PerfConsumerUnwinder::find_procfile (Dwfl *dwfl, pid_t *pid, Elf **elf, > int *elf_fd) > +{ > + int err = 0; /* The errno to return. XXX libdwfl would also set this for > dwfl->attacherr. */ > + > + /* Make sure to report the actual PID (thread group leader) to > + dwfl_attach_state. */ > + string buffer = format("/proc/{}/status", *pid); > + ifstream procfile(buffer); > + if (!procfile) > + { > + err = errno; > + fail: > + return err; > + } > + > + string line; > + while (getline (procfile, line)) > + if (startswith (line.c_str(), "Tgid:")) > + { > + errno = 0; > + char *endptr; > + long val = strtol (&line.c_str()[5], &endptr, 10); > + if ((errno == ERANGE && val == LONG_MAX) > + || *endptr != '\n' || val < 0 || val != (pid_t) val) > + *pid = 0; > + else > + *pid = (pid_t) val; > + break; > + } > + > + if (*pid == 0) > + { > + err = ESRCH; > + goto fail; > + } > + > + { > + string name = format("/proc/{}/task", *pid); > + DIR *dir = opendir (name.c_str()); > + if (dir == NULL) > + { > + err = errno; > + goto fail; > + } > + else > + closedir(dir); > + } > + > + { > + string name = format("/proc/{}/exe", *pid); > + *elf_fd = open (name.c_str(), O_RDONLY); > + } > + if (*elf_fd >= 0) > + { > + *elf = elf_begin (*elf_fd, ELF_C_READ_MMAP, NULL); > + if (*elf == NULL) > + { > + /* Just ignore, dwfl_attach_state will fall back to trying > + to associate the Dwfl with one of the existing Dwfl_Module > + ELF images (to know the machine/class backend to use). */ > + if (verbose) > + cerr << format(N_("WARNING: find_procfile pid {}: elf not > found\n"), (long long)*pid); > + close (*elf_fd); > + *elf_fd = -1; > + } > + } > + else > + *elf = NULL; > + return 0; > +} > + > +Dwfl *PerfConsumerUnwinder::init_dwfl(pid_t pid) > +{ > + Dwfl *dwfl = dwflst_tracker_dwfl_begin (this->tracker); > + > + int err = dwfl_linux_proc_report (dwfl, pid); > + if (err < 0) > + { > + if (verbose) > + cerr << format("WARNING: dwfl_linux_proc_report pid {}: {}\n", (long > long) pid, dwfl_errmsg(-1)); > + return NULL; > + } > + err = dwfl_report_end (dwfl, NULL, NULL); > + if (err != 0) > + { > + if (verbose) > + cerr << format("WARNING: dwfl_report_end pid {}: {}\n", (long long) > pid, dwfl_errmsg(-1)); > + return NULL; > + } > + > + return dwfl; > +} > + > +Dwfl *pcu_init_dwfl_cb (Dwflst_Process_Tracker *cb_tracker __attribute__ > ((unused)), > + pid_t pid, > + void *arg) > +{ > + PerfConsumerUnwinder *pcu = (PerfConsumerUnwinder *)arg; > + return pcu->init_dwfl (pid); > +} > + > +uint32_t expected_frame_nregs (Ebl *ebl) > +{ > + int m = ebl_get_elfmachine(ebl); > + /* For aarch64, we actually use fewer than ebl->frame_nregs to unwind. */ > + if (m == EM_ARM) > + return 14; /* XXX 16 for 32-bit ARM */ > + /* On x86, expect everything except FLAGS: */ > + if (m == EM_X86_64 || m == EM_386) > + return ebl_frame_nregs(ebl); > + /* In general, it's better to be on the permissive side. */ > + return 1; > +} > + > +Dwfl *PerfConsumerUnwinder::find_dwfl(pid_t pid, const uint64_t *regs, > uint32_t nregs, > + Elf **out_elf, bool *cached) > +{ > + if (nregs < expected_frame_nregs(this->reader->ebl())) > + { > + if (verbose) > + cerr << format(N_("WARNING: find_dwfl: nregs={}, expected at least > {}\n"), nregs, ebl_frame_nregs(this->reader->ebl())); > + return NULL; > + } > + > + Elf *elf = NULL; > + Dwfl *dwfl = dwflst_tracker_find_pid (this->tracker, pid, > pcu_init_dwfl_cb, this); > + int elf_fd = -1; > + int err; > + if (dwfl != NULL && dwfl_pid(dwfl) != -1 /* dwfl is attached */) > + { > + *cached = true; > + goto reuse; > + } > + err = this->find_procfile (dwfl, &pid, &elf, &elf_fd); > + if (err < 0) find_procfile returns positive errnos when it encounters an error. So this error handler will never run and elf/elf_fd remain NULL/-1. > + { > + if (verbose) > + cerr << format("WARNING: find_procfile pid {}: {}\n", (long long) > pid, dwfl_errmsg(-1)); > + return NULL; > + } > + > + reuse: > + this->last_us.sp = regs[this->get_sp_reg(this->last_us.elfclass == > ELFCLASS32)]; > + this->last_us.base = this->last_us.sp; > + > + if (!*cached) > + this->stats->pid_store_dwfl (pid, dwfl); > + *out_elf = elf; > + return dwfl; > +} > + > +/* Index of stack pointer within dwarf_regs order: */ > +int PerfConsumerUnwinder::get_sp_reg(bool is_abi32) > +{ > + int machine = ebl_get_elfmachine(this->reader->ebl()); > + if (machine == EM_X86_64 || machine == EM_386) return is_abi32 ? 4 : 7; > + else if (machine == EM_ARM) return is_abi32 ? 13 : 31; > + else { assert(0); return 7; } > +} > + > +int PerfConsumerUnwinder::unwind_frame_cb(Dwfl_Frame *state) > +{ > + Dwarf_Addr pc; > + bool isactivation; > + if (! dwfl_frame_pc (state, &pc, &isactivation)) > + { > + if (verbose) > + cerr << format("WARNING: dwfl_frame_pc: {}\n", dwfl_errmsg(-1)); > + return DWARF_CB_ABORT; > + } > + > + Dwarf_Addr pc_adjusted = pc - (isactivation ? 0 : 1); > + Dwarf_Addr sp; > + > + int is_abi32 = (this->last_us.elfclass == ELFCLASS32); > + int user_regs_sp = this->get_sp_reg(is_abi32); > + int rc = dwfl_frame_reg (state, user_regs_sp, &sp); > + if (rc < 0) > + { > + if (verbose) > + cerr << format("WARNING: dwfl_frame_reg: {}\n", dwfl_errmsg(-1)); > + return DWARF_CB_ABORT; > + } > + > + UnwindDwflStats *dwfl_ent = > this->stats->pid_find_or_create(this->last_us.pid); > + if (dwfl_ent != NULL) > + { > + Dwfl_Unwound_Source unwound_source = dwfl_frame_unwound_source(state); > + if (unwound_source > dwfl_ent->worst_unwound) > + dwfl_ent->worst_unwound = unwound_source; > + dwfl_ent->last_unwound = unwound_source; > + if (show_frames) > + { > + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc); > + uint64_t rel_pc = pc_adjusted; > + int j = dwfl_module_relocate_address (m, &rel_pc); > + (void) j; > + clog << format("* frame {:d}: rel_pc={:x} raw_pc={:x} sp={:x}+{:x} > [{}]\n", > + this->last_us.addrs.size(), rel_pc, pc_adjusted, > this->last_us.base, (sp - this->last_us.base), > dwfl_unwound_source_str(unwound_source)); > + } > + } > + else > + { > + if (show_frames) > + { > + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc); > + uint64_t rel_pc = pc_adjusted; > + int j = dwfl_module_relocate_address (m, &rel_pc); > + (void) j; > + clog << format(N_("* frame {:d}: rel_pc={:x} raw_pc={:x} > sp={:x}+{:x} [dwfl_ent not found]\n"), > + this->last_us.addrs.size(), rel_pc, pc_adjusted, > this->last_us.base, (sp - this->last_us.base)); > + } > + } > + if (show_debugfile) > + { > + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc); > + if (m == NULL) > + { > + clog << format("* pid {:d} pc={:x} -> MODULE NOT FOUND\n", > + this->last_us.pid, pc); > + } > + else > + { > + const unsigned char *desc; > + GElf_Addr vaddr; > + int build_id_len = dwfl_module_build_id (m, &desc, &vaddr); > + clog << format("* pid {:d} build_id=", this->last_us.pid); > + for (int i = 0; i < build_id_len; ++i) > + clog << format("{:02x}", static_cast<int>(desc[i])); > + > + const char *mainfile; > + const char *debugfile; > + const char *modname = dwfl_module_info (m, NULL, NULL, NULL, NULL, > + NULL, &mainfile, > &debugfile); > + clog << format("module={} mainfile={} debugfile={}\n", > + modname, > + mainfile ? mainfile : "<none>", > + debugfile ? debugfile : "<none>"); > + /* TODO: Also store this data to avoid repeated extraction for > + the final buildid summary? */ > +#ifdef DEBUG_MODULES > + Dwarf_Addr bias; > + Dwarf_CFI *cfi_eh = dwfl_module_eh_cfi (m, &bias); > + if (cfi_eh == NULL) > + clog << format("* pc={:x} -> NO EH_CFI\n", pc); > +#endif > + } > + } > + > + this->last_us.sp = sp; > + this->last_us.addrs.push_back(pc); > + > + /* e.g. gmon callgraphs only requires maxframes=1 > + (initial pc + one frame for caller ID only) */ > + if (this->last_us.addrs.size() > this->maxframes) > + { > + /* XXX without maxframes, very rarely, the unwinder can loop > + infinitely; worth investigating? */ > + return DWARF_CB_ABORT; > + } > + return DWARF_CB_OK; > +} > + > +int pcu_unwind_frame_cb(Dwfl_Frame *state, void *arg) > +{ > + PerfConsumerUnwinder *pcu = (PerfConsumerUnwinder *)arg; > + return pcu->unwind_frame_cb(state); > +} > + > + > +//////////////////////////////////////////////////////////////////////// > +// real perf consumer: event handler callbacks > + > +void PerfConsumerUnwinder::process_comm(const perf_event_header *sample, > + uint32_t pid, uint32_t tid, bool > exec, const string &comm) > +{ > + // NB: Could have dwflst ditch data for process and start anew, if EXEC. > + // XXX: is this needed to avoid gradual memory leaks or pid reuse? > +} > + > +void PerfConsumerUnwinder::process_exit(const perf_event_header *sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) > +{ > + // NB: Could have dwflst ditch data for process. > + // XXX: is this needed to avoid gradual memory leaks or pid reuse? > +} > + > +void PerfConsumerUnwinder::process_fork(const perf_event_header *sample, > + uint32_t pid, uint32_t ppid, > + uint32_t tid, uint32_t ptid) > +{ > + // NB: Could have dwflst begin tracking a new process, but > + // this will likely happen automatically when a packet is received > + // from it. The short duration between fork/exec typically means > + // elfutils will pick up on the post-exec process -- we would have > + // to work hard to replicate a situation where > + // process_fork/process_comm handling are needed. > +} > + > +void PerfConsumerUnwinder::process_sample(const perf_event_header *sample, > + uint64_t ip, > + uint32_t pid, uint32_t tid, > + uint64_t time, > + uint64_t abi, > + uint32_t nregs, const uint64_t > *regs, > + uint64_t data_size, const uint8_t > *data) > +{ > + string comm; > + if (show_summary) > + comm = this->stats->pid_find_comm(pid); > + > + if (show_frames) > + clog << "\n"; /* extra newline for padding */ > + > + Elf *elf = NULL; // Released during dwflst_tracker_end > + bool cached = false; > + Dwfl *dwfl = this->find_dwfl (pid, regs, nregs, &elf, &cached); > + UnwindDwflStats *dwfl_ent = NULL; > + bool first_load = false; /* -> for show_modules: pid is loaded first time > */ > + if (verbose || show_summary || show_modules) > + { > + if (dwfl_ent == NULL) > + dwfl_ent = this->stats->pid_find_or_create(pid); > + if (dwfl_ent->total_samples == 0) > + first_load = true; > + } > + if (dwfl == NULL) > + { > + if (show_summary || show_modules) > + { > + /* dwfl_ent loaded above */ > + dwfl_ent->total_samples++; > + dwfl_ent->lost_samples++; > + } > + if (verbose && show_summary) > + { > + cerr << format("WARNING: find_dwfl pid {} ({}) (failed)\n", (long > long)pid, comm); > + } > + else > + { > + cerr << format("WARNING: find_dwfl pid {} (failed)\n", (long > long)pid); > + } > + return; > + } > + > + if (show_samples || (first_load && show_modules)) > + { > + bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32); > + clog << format("find_dwfl {}pid {:d} {}({}): hdr_size={:d} size={:d}{} > pc={:x} sp={:x}+{:d}\n", > + first_load ? "newly seen " : "", (long long)pid, > + (cached ? "(cached) " : ""), comm, > + sample->size, data_size, > + (is_abi32 ? " (32-bit)" : ""), ip, > + this->last_us.base, 0); > + } > + > + this->last_us.addrs.clear(); > + this->last_us.elfclass = (abi == PERF_SAMPLE_REGS_ABI_32 ? ELFCLASS32 : > ELFCLASS64); > + this->last_us.dwfl = dwfl; > + this->last_us.pid = pid; > + int rc = dwflst_perf_sample_getframes (dwfl, elf, pid, tid, > + data, data_size, > + regs, nregs, > + this->reader->regs_mask(), abi, > + pcu_unwind_frame_cb, this); > + if (rc < 0) > + { > + /* dwfl_ent loaded above */ > + if (verbose && dwfl_ent->shown_errors < 10) > + { > + dwfl_ent->shown_errors ++; > + cerr << format("WARNING: dwflst_perf_sample_getframes pid {}: > {}{}\n", > + (long long)pid, dwfl_errmsg(-1), > + dwfl_ent->shown_errors >= 10 ? > + " (...suppressing further warnings for this pid)" : > ""); > + } > + } > + if (show_summary) > + { > + /* For final diagnostics. dwfl_ent loaded above */ > + if (this->last_us.addrs.size() > (unsigned long)dwfl_ent->max_frames) > + dwfl_ent->max_frames = this->last_us.addrs.size(); > + dwfl_ent->total_samples++; > + if (this->maxframes > 2 && this->last_us.addrs.size() <= 2) > + dwfl_ent->lost_samples++; > + } > + > + this->consumer->process (&this->last_us); > + return; > +} > + > +void PerfConsumerUnwinder::process_mmap2(const perf_event_header *sample, > + uint32_t pid, uint32_t tid, > + uint64_t addr, uint64_t len, > uint64_t pgoff, > + uint8_t build_id_size, const uint8_t > *build_id, > + const char *filename) > +{ > + Dwfl *dwfl = this->stats->pid_find_dwfl(pid); > + if (dwfl != NULL) > + { > + dwfl_report_begin_add(dwfl); > + dwfl_report_module(dwfl, filename, /*start*/ addr, /*end*/ addr + len); > + dwfl_report_end(dwfl, NULL, NULL); > + } > +} > + > + > +//////////////////////////////////////////////////////////////////////// > +// unwind data consumers // basic statistics > + > +UnwindStatsConsumer::~UnwindStatsConsumer() > +{ > + this->stats->print_summary(); > +} > + > +void UnwindStatsConsumer::process(const UnwindSample* sample) > +{ > + /* Most of the logic is handled by UnwindStatsTable. */ > +} > + > +int UnwindStatsConsumer::maxframes() > +{ > + return opt_maxframes >= 0 ? opt_maxframes : 256; > +} > + > + > +//////////////////////////////////////////////////////////////////////// > +// unwind data consumers // gprof > + > +/* gmon.out file format bits */ > +#define GMON_MAGIC "gmon" > +#define GMON_VERSION 1 > + > +struct gmon_hdr { > + char cookie[4]; > + char version[4]; > + char spare[3 * 4]; > +}; > + > +enum gmon_entry_tag { > + GMON_TAG_TIME_HIST = 0, > + GMON_TAG_CG_ARC = 1, > + GMON_TAG_BB_COUNT = 2, > +}; > + > +struct gmon_hist_hdr { > + uint8_t tag; /* GMON_TAG_TIME_HIST */ > + uint8_t unused[3]; > + uint64_t low_pc; > + uint64_t high_pc; > + uint32_t num_buckets; > + uint32_t prof_rate; > + char _dimension_string[16]; > +}; > + > + > +void GprofUnwindSampleConsumer::record_gmon_hist(ostream &of, map<uint64_t, > uint32_t> &histogram, uint64_t low_pc, uint64_t high_pc, uint64_t alignment) > +{ > + // write one histogram from low_pc ... high_pc > + uint32_t num_buckets = (high_pc-low_pc)/alignment + 1; > + double result_scale = > (double)((high_pc-low_pc)/sizeof(uint16_t))/num_buckets; > + if (verbose > 5) > + /* It's the @scale value that must be kept within 0.000001 of 0.5 to > + keep gprof from complaining. */ > + clog << format("+histogram {:x}..{:x} (alignment {}) of {} buckets > @scale {}\n", > + low_pc, high_pc, alignment, num_buckets, result_scale); > + > + // write histogram record header > + unsigned char tag = GMON_TAG_TIME_HIST; > + of.write(reinterpret_cast<const char *>(&tag), sizeof(tag)); > + int wordsize = (sizeof (void *) == 8) ? 8 : 4; > + if (wordsize == 4) { > + uint32_t addr = low_pc; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + addr = high_pc; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + } else { > + of.write(reinterpret_cast<const char *>(&low_pc), sizeof(low_pc)); > + of.write(reinterpret_cast<const char *>(&high_pc), sizeof(high_pc)); > + } > + of.write(reinterpret_cast<const char *>(&num_buckets), > sizeof(num_buckets)); > + uint32_t prof_rate = attr.sample_freq; > + of.write(reinterpret_cast<const char *>(&prof_rate), sizeof(prof_rate)); > + // dimension string is 15 chars long (not null terminated) > + std::string dimension_base = libpfm_event.empty() ? "ticks" : > + libpfm_event.substr(0, 15); > + dimension_base.resize(15, '\0'); // ensure exactly 15 bytes > + of.write(dimension_base.data(), 15); > + // dimension character abbreviation: just take the first char of above > + of.write(dimension_base.data(), 1); > + > + // write histogram buckets > + uint64_t bucket_addr = low_pc; > + int n_overflows = 0, max_overflows = 5; // limit 'bucket overflow' spam > + for (uint32_t bucket = 0; bucket < num_buckets; bucket++) > + { > + uint16_t count = 0; > + for (auto it = histogram.lower_bound(bucket_addr); > + it != histogram.upper_bound(bucket_addr+alignment-1); > + it ++) > + { > + if (numeric_limits<uint16_t>::max() <= (int) count + (int) > it->second) > + { > + count = numeric_limits<uint16_t>::max(); > + // XXX: a provisional error message to give a sense of > + // whether this happens often-enough to do something > + // more complex, such as adjusting the histogram > + // granularity: > + if (n_overflows >= max_overflows) break; > + n_overflows++; > + cerr << format("WARNING: histogram bucket overflow at {:x}{}", > + bucket_addr, > + n_overflows >= max_overflows ? > + " (... suppressing further warnings for this > histogram)" : "") > + << endl; > + break; > + } > + count += it->second; > + } > + bucket_addr += alignment; > + of.write(reinterpret_cast<const char *>(&count), sizeof(count)); > + } > +} > + > +void GprofUnwindSampleConsumer::record_gmon_out(const string& buildid, > UnwindModuleStats& m) > +{ > + string filename = output_dir + "/" + "gmon." + buildid + ".out"; > + string exe_symlink_path = output_dir + "/" + "gmon." + buildid + ".exe"; > + string json_path = output_dir + "/" + "gmon." + buildid + ".json"; > + > + if (output_force) { > + filesystem::remove(filename); > + filesystem::remove(exe_symlink_path); > + filesystem::remove(json_path); > + } > + > + string target_path = buildid_to_mainfile[buildid]; > + if (target_path != unknown_comm) // skip .exe symlink if there's no path > + if (symlink(target_path.c_str(), exe_symlink_path.c_str()) == -1) { > + // Handle error, e.g., print errno or throw exception > + cerr << format("WARNING: symlink failed: {}\n", strerror(errno)); > + // NB: no return needed here; proceed to write out other bits. > + // A smart enough consumer will make do with buildid based executable > lookup. > + } > + > + json_object *metadata = json_object_new_object(); > + if (!metadata) { > + json_fail: > + cerr << format("ERROR: json allocation failed: {}\n", strerror(errno)); > + return; > + } > + json_object *buildid_js = json_object_new_string(buildid.c_str()); > + if (NULL == buildid_js) goto json_fail; > + json_object_object_add(metadata, "buildid", buildid_js); > + if (buildid_to_mainfile.count(buildid) != 0) { > + const string &mainfile = buildid_to_mainfile[buildid]; > + json_object *mainfile_js = json_object_new_string(mainfile.c_str()); > + if (NULL == mainfile_js) goto json_fail; > + json_object_object_add(metadata, "mainfile", mainfile_js); > + } > + if (buildid_to_debugfile.count(buildid) != 0) { > + const string &debugfile = buildid_to_debugfile[buildid]; > + json_object *debugfile_js = json_object_new_string(debugfile.c_str()); > + if (NULL == debugfile_js) goto json_fail; > + json_object_object_add(metadata, "debugfile", debugfile_js); > + } > + if (libpfm_event != "") { > + json_object *event_js = json_object_new_string(libpfm_event.c_str()); > + if (NULL == event_js) goto json_fail; > + json_object_object_add(metadata, "libpfm-event", event_js); > + } > + if (libpfm_event_decoded != "") { > + json_object *event_js = > json_object_new_string(libpfm_event_decoded.c_str()); > + if (NULL == event_js) goto json_fail; > + json_object_object_add(metadata, "libpfm-event-decoded", event_js); > + } > + { > + json_object *br_js = json_object_new_boolean(branch_record); > + if (NULL == br_js) goto json_fail; > + json_object_object_add(metadata, "branch-record", br_js); > + } > + > + const char *metadata_str = json_object_to_json_string(metadata); > + if (!metadata_str) goto json_fail; > + ofstream of_js (json_path); > + of_js << metadata_str; > + of_js.close(); > + json_object_put (metadata); > + > + ofstream of (filename, ios::binary); > + if (!of) > + { > + cerr << format(N_("ERROR: buildid {} -- could not open '{}' for > writing\n"), buildid, filename); > + } > + > + /* Write gmon header. It and other headers mostly hold > + native-endian and fixed (or native) bitwidth values. In > + principle, we should get the bitness/endianness from the > + particular executable associated with the buildid. But, being a > + live profiler, we don't really have to deal with CROSS > + architecture work, and for now can just hard-code the bitness to > + match this host program. XXX > + */ > + int wordsize = (sizeof (void *) == 8) ? 8 : 4; > + struct gmon_hdr ghdr; > + memcpy (&ghdr.cookie[0], GMON_MAGIC, 4); > + uint32_t version = GMON_VERSION; > + memcpy (&ghdr.version[0], reinterpret_cast<const char *>(&version), 4); > + memset (&ghdr.spare[0], 0, sizeof(ghdr.spare)); > + of.write(reinterpret_cast<const char *>(&ghdr), sizeof(ghdr)); > + > + if (m.histogram.size() > 0) > + { > + uint64_t low_pc = m.histogram.begin()->first; > + uint64_t high_pc = m.histogram.rbegin()->first; > + uint64_t alignment = (high_pc - low_pc + 1) / UINT_MAX + 1; > + > + if (gmon_hist_split == HIST_SPLIT_NONE) > + { > + /* Put everything into one histogram. */ > + this->record_gmon_hist(of, m.histogram, low_pc, high_pc, alignment); > + } > + else if (gmon_hist_split == HIST_SPLIT_EVEN) > + { > + /* This option attempts to satisfy gprof's histogram scale > + consistency check, which requires all values > + '(double)(high_pc-low_pc)/num_buckets' to fall within > + EPSILON. In practice, we can only be sure of this if we > + cover the address space with histograms all one size. */ > + > + /* Keep the search for 'optimal' size simple -- we just need > + a plausible order of magnitude. XXX Some rechecking of > + correctness needed. */ > + //uint64_t min_size = 1; // this is 'optimal' much of the time > + uint64_t min_size = 1024; > + uint64_t max_size = high_pc - low_pc; > + uint64_t opt_size = min_size; > + uint64_t opt_est = 0; > + uint64_t next_size = opt_size; > + while (next_size < max_size) > + { > + if (next_size > max_size) `next_size > max_size` is never true in this while loop. > + next_size = max_size; > + uint64_t size_inc = sizeof(struct gmon_hdr) + next_size; > + uint64_t size_est = size_inc; > + uint64_t pc = low_pc; > + while (pc + size_est < high_pc) > + { > + auto it = m.histogram.upper_bound(pc + size_est/alignment); > + if (it == m.histogram.end()) > + break; > + pc = it->first; > + size_est += sizeof(struct gmon_hdr) + next_size; > + } > + if (opt_est == 0 || size_est < opt_est) > + { > + opt_size = next_size; > + opt_est = size_est; > + } > + // if (opt_est > prev_est) break; /* XXX: We've hit the lowest > point. */ > + next_size = 2 * next_size; > + } > + > + /* Partition into histograms of opt_size. > + XXX: May need to check if low_pc must be aligned. */ > + uint64_t prev_pc = low_pc; > + uint64_t pc = prev_pc; > + for (const auto& p : m.histogram) > + { > + pc = p.first; > + if (pc - low_pc > opt_size) > + { > + /* Record a histogram from low_pc to low_pc+opt_size. */ > + this->record_gmon_hist(of, m.histogram, > + low_pc, low_pc+opt_size-1 /* >= > prev_pc */, > + alignment); > + low_pc = pc; > + } > + prev_pc = pc; > + } > + /* Record a final histogram from low_pc to low_pc+opt_size. > + XXX: Edge case -- may want to adjust for overflow of > + low_pc+opt_size at end of address space. */ > + this->record_gmon_hist(of, m.histogram, > + low_pc, low_pc+opt_size-1 /* >= prev_pc */, > + alignment); > + } > + else if (gmon_hist_split == HIST_SPLIT_FLEX) > + { > + /* Allow variable-size histograms to save on storage space. > + Will fail gprof's input consistency checks, XXX but ok > + for profiledb purposes? */ > + uint64_t prev_pc = low_pc; > + uint64_t pc = prev_pc; > + /* XXX Iterate histogram ascending by key, faster than by addr > + when we just need to scan for gaps. */ > + for (const auto& p : m.histogram) > + { > + pc = p.first; > + uint64_t bin_dist = (pc - prev_pc) / alignment; > + if (bin_dist > sizeof(struct gmon_hist_hdr)) > + /* XXX If we add '&& low_pc != prev_pc && pc != high_pc', > + this avoids producing a histogram with only 1 entry, > + but this is still not enough to satisfy gprof's > + histogram scale calculation. */ > + { > + /* Record a histogram from low_pc to prev_pc. */ > + this->record_gmon_hist(of, m.histogram, low_pc, prev_pc, > alignment); > + low_pc = pc; > + } > + prev_pc = pc; > + } > + /* Record a final histogram from low_pc to pc. */ > + this->record_gmon_hist(of, m.histogram, low_pc, pc, alignment); > + } > + } > + > + /* Write call graph arcs. */ > + for (auto& p : m.callgraph) > + { > + unsigned char tag = GMON_TAG_CG_ARC; > + of.write(reinterpret_cast<const char *>(&tag), sizeof(tag)); > + if (wordsize == 4) { > + uint32_t addr = p.first.first; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + addr = p.first.second; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + } else { > + uint64_t addr = p.first.first; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + addr = p.first.second; > + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr)); > + } > + /* p is (from,to) -> count */ > + uint32_t count = p.second; > + of.write(reinterpret_cast<const char *>(&count), sizeof(count)); > + } > + > + of.close(); > +} > + > +GprofUnwindSampleConsumer::~GprofUnwindSampleConsumer() > +{ > + if (show_summary) > + { > + this->stats->print_summary (); > + clog << "=== buildid / sample counts ===\n"; > + } > + > + UnwindStatsTable::buildid_map_t sorted_map > (this->stats->buildid_tab.begin(), this->stats->buildid_tab.end()); > + for (auto& p : sorted_map) // traverse in sorted order > + { > + const string& buildid = p.first; > + UnwindModuleStats& module_stats = p.second; > + this->record_gmon_out(buildid, module_stats); > + if (show_summary) > + { > + /* In record_gmon_out we will write the buildid-->path mapping > + to a json metadata file. That makes for a reasonable hint; > + debuginfod-find can be used as a mostly-functional fallback > + (for packaged rather than locally built executables) if the > + results are moved to another system. */ > + string mainfile = "<unknown>"; > + if (buildid_to_mainfile.count(buildid) != 0) > + mainfile = buildid_to_mainfile[buildid]; > + string debugfile = ""; > + if (buildid_to_debugfile.count(buildid) != 0) > + debugfile = buildid_to_debugfile[buildid]; > + clog << format(N_("buildid {} ({}{}{}) -- received {} distinct > pcs, {} callgraph arcs\n"), /* TODO also count samples / estimated histogram > size? */ > + buildid, > + mainfile, > + debugfile.empty() ? "" : " +debugfile ", > + debugfile, > + module_stats.histogram.size(), > + module_stats.callgraph.size()); > + } > + } > + if (show_summary) > + { > + clog << "===\n"; > + clog << format(N_("TOTAL -- received {} buildids\n"), > this->stats->buildid_tab.size()); > + } > + clog << "\n"; > +} > + > + > +int > +GprofUnwindSampleConsumer::maxframes() > +{ > + // gprof only needs one level of backtracing, > + // but user can override consumer's preference > + // with --maxframes option: > + return opt_maxframes >= 0 ? opt_maxframes : 1; > +} > + > + > +void GprofUnwindSampleConsumer::process(const UnwindSample *sample) > +{ > + if (sample->addrs.size() < 1) > + return; /* edge case -- no pc or callgraph arc */ > + > + Dwarf_Addr pc = sample->addrs[0]; > + Dwarf_Addr pc2 = sample->addrs.size() < 2 ? 0 : sample->addrs[1]; > + > + Dwfl_Module *mod = dwfl_addrmodule(sample->dwfl, pc); > + if (mod == NULL) > + return; > +#if 0 > + Dwarf_Addr bias; > + Elf *elf = dwfl_module_getelf (mod, &bias); > + (void)elf; > +#endif Similar comment as the previous #if 0 block. If this is not intended for debug builds going forward, let's get rid of it. Otherwise let's use a macro defined near the top of the file to control whether it's enabled. Both of the existing #if 0 blocks could be controlled with one macro if appropriate. Aaron > + > + Dwfl_Module *mod2 = dwfl_addrmodule(sample->dwfl, pc2); > + // XXX: allowing mod2 == NULL -- callgraph arc will be skipped > + > + // extract buildid for pc (hit callee) > + const unsigned char *desc = nullptr; > + GElf_Addr vaddr; > + int build_id_len = dwfl_module_build_id(mod, &desc, &vaddr); > + if (build_id_len <= 0) > + return; // TODO: report/tabulate hit outside known modules > + > + // possible optimization would be to use the unconverted build_id_desc as > hash key > + string buildid; > + for (int i = 0; i < build_id_len; ++i) { > + buildid += format("{:02x}", static_cast<int>(desc[i])); > + } > + > + const char *mainfile_cstr; > + const char *debugfile_cstr; > + Dwarf_Addr low_addr; > + Dwarf_Addr high_addr; > + dwfl_module_info (mod, NULL, &low_addr, &high_addr, NULL, > + NULL, &mainfile_cstr, &debugfile_cstr); > + string mainfile = mainfile_cstr ? mainfile_cstr : "<unknown>"; > + string debugfile = debugfile_cstr ? debugfile_cstr : ""; > + if (!buildid_to_mainfile.count(buildid)) > + buildid_to_mainfile[buildid] = mainfile; > + if (!buildid_to_debugfile.count(buildid)) > + buildid_to_debugfile[buildid] = debugfile; > + /* XXX: Also monitor for collisions here? */ > + > + UnwindModuleStats *buildid_ent = > this->stats->buildid_find_or_create(buildid, mod); > + > + uint64_t last_pc = pc; > + int i = dwfl_module_relocate_address (mod, &pc); > + /* XXX: Out-of-range address seen with ld-linux.so, not useful for > profiledb purposes: */ > + if ((last_pc < low_addr || last_pc > high_addr)) > + { > + if (verbose) > + clog << format(N_("{}: Skipping pc={:x} raw_pc={:x} outside module > range start={:x}..end={:x}\n"), > + mainfile, pc, last_pc, low_addr, high_addr); > + return; > + } > + (void) i; > + // XXX: could get dwfl_module_relocation_info (mod, i, NULL), but no need? > + buildid_ent->record_pc(pc); > + > + // If caller & callee are in different modules, this is a > cross-shared-library > + // call, so we can't track it as a call-graph arc. TODO: at least count > them > + if (sample->addrs.size() >= 2 && mod == mod2) // intra-module call > + { > + last_pc = pc2; > + int j = dwfl_module_relocate_address (mod, &pc2); // map pc2 also > + if (last_pc < low_addr || last_pc > high_addr) > + { > + if (verbose) > + clog << format(N_("{}: Skipping pc={:x} raw_pc={:x} outside > module range start={:x}..end={:x}\n"), > + mainfile, pc2, last_pc, low_addr, high_addr); > + return; > + } > + (void) j; > + buildid_ent->record_callgraph_arc(pc2, pc); > + } > +} > + > -- > 2.53.0 >
