Hi Serhei,

On Sun, 2025-03-16 at 19:12 -0400, Serhei Makarov wrote:
> First patch of a series that reworks eu-stacktrace functionality
> into a library interface for other profiling tools.
> 
> * * *
> 
> As it happens, Linux perf_events and DWARF can prescribe completely
> different layouts for the register file, requiring non-obvious code
> for translation. This makes sense to put in libebl if we want
> profilers to handle perf sample data with elfutils.
> 
> Start with the x86_64 implementation taken from eu-stacktrace. The
> code has been generalized to accept other perf register masks besides
> the 'preferred' one.
> 
> * backends/Makefile.am (x86_64_SRCS): Add x86_64_initreg_sample.c.
> * backends/libebl_PERF_FLAGS.h: New file.
> * backends/x86_64_initreg_sample.c: New file, implements a generalized
>   version of the eu-stacktrace register shuffling for x86_64.
> * backends/x86_64_init.c (x86_64_init): Add initialization for
>   set_initial_registers_sample, perf_frame_regs_mask.
> * libebl/Makefile.am (libebl_a_SOURCES): Add eblinitreg_sample.c.
> * libebl/ebl-hooks.h (set_initial_registers_sample): New hook.
> * libebl/eblinitreg_sample.c: New file, implements ebl interface to
>   set_initial_registers_sample backend hook.
> * libebl/libebl.h (ebl_set_initial_registers_sample): New function.
>   (ebl_perf_frame_regs_mask): New function.
> * libebl/libeblP.h (struct ebl): Add perf_frame_regs_mask field
>   giving the preferred register mask.

Just some random observations, I don't believe I really understand
where this is going yet. Will have to read the rest of the patchset
first.
> +
> +#if defined(_ASM_X86_PERF_REGS_H)
> +/* XXX See the code in x86_64_initreg.c for the list of required regs
> +   and linux arch/.../include/asm/ptrace.h for matching pt_regs struct.  */
> +#define REG(R) (1ULL << PERF_REG_X86_ ## R)
> +/* XXX FLAGS and segment regs are excluded from the following masks: */
> +#define PERF_FRAME_REGISTERS_I386 (REG(AX) | REG(BX) | REG(CX) | REG(DX) \
> +  | REG(SI) | REG(DI) | REG(BP) | REG(SP) | REG(IP))
> +#define PERF_FRAME_REGISTERS_X86_64 (PERF_FRAME_REGISTERS_I386 | REG(R8) \
> +  | REG(R9) | REG(R10) | REG(R11) | REG(R12) | REG(R13) | REG(R14) | 
> REG(R15))
> +/* XXX Register ordering defined in linux 
> arch/x86/include/uapi/asm/perf_regs.h;
> +   see the code in tools/perf/util/intel-pt.c intel_pt_add_gp_regs()
> +   and note how regs are added in the same order as the perf_regs.h enum.  */
> +#else
> +/* Since asm/perf_regs.h is for a different arch, we can't unwind x86_64 
> frames.  */
> +#define PERF_FRAME_REGISTERS_X86_64 0
> +#endif
> +
> +#endif       /* libebl_PERF_FLAGS.h */

Why the XXX?
The PERF_FRAME_REGISTERS_I386 naming is a little confusing imho.
These don't seem to be the i386 registers, but the 64bit x86_64 ones.

> diff --git a/backends/x86_64_init.c b/backends/x86_64_init.c
> index be965fa6..2e6d1df1 100644
> --- a/backends/x86_64_init.c
> +++ b/backends/x86_64_init.c
> @@ -1,5 +1,5 @@
>  /* Initialization of x86-64 specific backend library.
> -   Copyright (C) 2002-2009, 2013, 2018 Red Hat, Inc.
> +   Copyright (C) 2002-2009, 2013, 2018, 2025 Red Hat, Inc.
>     Copyright (C) H.J. Lu <hjl.to...@gmail.com>, 2015.
>     This file is part of elfutils.
>     Written by Ulrich Drepper <drep...@redhat.com>, 2002.
> @@ -35,6 +35,7 @@
>  #define BACKEND              x86_64_
>  #define RELOC_PREFIX R_X86_64_
>  #include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
>  
>  /* This defines the common reloc hooks based on x86_64_reloc.def.  */
>  #include "common-reloc.c"
> @@ -62,6 +63,8 @@ x86_64_init (Elf *elf __attribute__ ((unused)),
>    /* gcc/config/ #define DWARF_FRAME_REGISTERS.  */
>    eh->frame_nregs = 17;
>    HOOK (eh, set_initial_registers_tid);
> +  HOOK (eh, set_initial_registers_sample);
> +  eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_X86_64;
>    HOOK (eh, unwind);
>    HOOK (eh, check_reloc_target_type);
>  
> diff --git a/backends/x86_64_initreg_sample.c 
> b/backends/x86_64_initreg_sample.c
> new file mode 100644
> index 00000000..182f8197
> --- /dev/null
> +++ b/backends/x86_64_initreg_sample.c
> @@ -0,0 +1,107 @@
> +/* Populate process registers from a linux perf_events sample.
> +   Copyright (C) 2025 Red Hat, Inc.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of either
> +
> +     * the GNU Lesser General Public License as published by the Free
> +       Software Foundation; either version 3 of the License, or (at
> +       your option) any later version
> +
> +   or
> +
> +     * the GNU General Public License as published by the Free
> +       Software Foundation; either version 2 of the License, or (at
> +       your option) any later version
> +
> +   or both in parallel, as here.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received copies of the GNU General Public License and
> +   the GNU Lesser General Public License along with this program.  If
> +   not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#if defined(__x86_64__) && defined(__linux__)
> +# include <linux/perf_event.h>
> +# include <asm/perf_regs.h>
> +#endif
> +
> +#define BACKEND x86_64_
> +#include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
> +
> +bool
> +x86_64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
> +                                  uint64_t regs_mask, uint32_t abi,
> +                                  ebl_tid_registers_t *setfunc,
> +                                  void *arg)
> +{
> +#if !defined(__x86_64__) || !defined(__linux__)
> +  return false;
> +#else /* __x86_64__ */
> +  /* The following facts are needed to translate x86 registers correctly:
> +     - perf register order seen in linux 
> arch/x86/include/uapi/asm/perf_regs.h
> +       The registers array is built in the same order as the enum!
> +       (See the code in tools/perf/util/intel-pt.c intel_pt_add_gp_regs().)
> +     - EBL PERF_FRAME_REGS_MASK specifies all registers except segment and
> +       flags.  However, regs_mask might be a different set of registers.
> +       Again, regs_mask bits are in asm/perf_regs.h enum order.
> +     - dwarf register order seen in elfutils backends/{x86_64,i386}_initreg.c
> +       (matching pt_regs struct in linux arch/x86/include/asm/ptrace.h)
> +       and it's a fairly different register order!
> +
> +     For comparison, you can study 
> codereview.qt-project.org/gitweb?p=qt-creator/perfparser.git;a=blob;f=app/perfregisterinfo.cpp;hb=HEAD
> +     and follow the code which uses those tables of magic numbers.
> +     But it's better to follow original sources of truth for this.  */
> +
> +  bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32);

Why care about the ABI here?
Wouldn't the hook go to i386_set_initial_registers_sample for the 32
bit abi?

> +
> +  /* Locations of dwarf_regs in the perf_event_x86_regs enum order,
> +     not the regs[i] array (which will include a subset of the regs): */
> +  static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/};
> +  static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/,
> +                                 16/*r8 after flags+segment*/, 17, 18, 19, 
> 20, 21, 22, 23,
> +                                 8/*ip*/};
> +  const int *dwarf_to_perf = is_abi32 ? regs_i386 : regs_x86_64;
> +
> +  /* Locations of perf_regs in the regs[] array, according to regs_mask: */
> +  int perf_to_regs[PERF_REG_X86_64_MAX];
> +  uint64_t expected_mask = is_abi32 ? PERF_FRAME_REGISTERS_I386 : 
> PERF_FRAME_REGISTERS_X86_64;
> +  int j, k; uint64_t bit;
> +  /* TODO(REVIEW): Is it worth caching this perf_to_regs computation
> +     as long as regs_mask is kept the same across repeated calls? */
> +  for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
> +    {
> +      if ((bit & expected_mask) && (bit & regs_mask)) {
> +     if (n_regs <= (uint32_t)j)
> +       return false; /* regs_mask count doesn't match n_regs */
> +     perf_to_regs[k] = j;
> +     j++;
> +      } else {
> +     perf_to_regs[k] = -1;
> +      }
> +    }
> +
> +  int expected_regs = is_abi32 ? 9 : 17; /* cf eh->frame_nregs, 
> {i386,x86_64}_init.c */

So same question here, you should be able to use eh->frame_nregs
directly.

> +  Dwarf_Word dwarf_regs[17];
> +  for (int i = 0; i < expected_regs; i++)
> +    {
> +      k = dwarf_to_perf[i];
> +      j = perf_to_regs[k];
> +      if (j < 0) continue;
> +      if (n_regs <= (uint32_t)j) continue;
> +      dwarf_regs[i] = regs[j];
> +    }
> +  return setfunc (0, 17, dwarf_regs, arg);

instead of 17 in both these places, you can then just use expected_regs
(or eh->frame_nregs). I think the last one certainly should be
expected_regs.

> +#endif /* __x86_64__ */
> +}
> diff --git a/libebl/Makefile.am b/libebl/Makefile.am
> index ea092b5a..3df12ce2 100644
> --- a/libebl/Makefile.am
> +++ b/libebl/Makefile.am
> @@ -1,6 +1,6 @@
>  ## Process this file with automake to create Makefile.in
>  ##
> -## Copyright (C) 2000-2010, 2013, 2016, 2017 Red Hat, Inc.
> +## Copyright (C) 2000-2010, 2013, 2016, 2017, 2025 Red Hat, Inc.
>  ## This file is part of elfutils.
>  ##
>  ## This file is free software; you can redistribute it and/or modify
> @@ -51,7 +51,7 @@ libebl_a_SOURCES = eblopenbackend.c eblclosebackend.c 
> eblreloctypename.c \
>                  eblbsspltp.c eblretval.c eblreginfo.c eblnonerelocp.c \
>                  eblrelativerelocp.c eblsysvhashentrysize.c eblauxvinfo.c \
>                  eblcheckobjattr.c ebl_check_special_section.c \
> -                eblabicfi.c eblstother.c eblinitreg.c \
> +                eblabicfi.c eblstother.c eblinitreg.c eblinitreg_sample.c \
>                  ebldwarftoregno.c eblnormalizepc.c eblunwind.c \
>                  eblresolvesym.c eblcheckreloctargettype.c \
>                  ebl_data_marker_symbol.c
> diff --git a/libebl/ebl-hooks.h b/libebl/ebl-hooks.h
> index d6437e53..f0475cdf 100644
> --- a/libebl/ebl-hooks.h
> +++ b/libebl/ebl-hooks.h
> @@ -1,5 +1,5 @@
>  /* Backend hook signatures internal interface for libebl.
> -   Copyright (C) 2000-2011, 2013, 2014, 2016, 2017 Red Hat, Inc.
> +   Copyright (C) 2000-2011, 2013, 2014, 2016, 2017, 2025 Red Hat, Inc.
>     This file is part of elfutils.
>  
>     This file is free software; you can redistribute it and/or modify
> @@ -158,6 +158,14 @@ bool EBLHOOK(set_initial_registers_tid) (pid_t tid,
>                                        ebl_tid_registers_t *setfunc,
>                                        void *arg);
>  
> +/* Set process data from a perf_events sample and call SETFUNC one or more 
> times.
> +   Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0, 
> otherwise the
> +   backend doesn't support unwinding from perf_events data.  */
> +bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, uint32_t 
> n_regs,
> +                                         uint64_t regs_mask, uint32_t abi,
> +                                         ebl_tid_registers_t *setfunc,
> +                                         void *arg);
> +
>  /* Convert *REGNO as is in DWARF to a lower range suitable for
>     Dwarf_Frame->REGS indexing.  */
>  bool EBLHOOK(dwarf_to_regno) (Ebl *ebl, unsigned *regno);
> diff --git a/libebl/eblinitreg_sample.c b/libebl/eblinitreg_sample.c
> new file mode 100644
> index 00000000..2a387b7a
> --- /dev/null
> +++ b/libebl/eblinitreg_sample.c
> @@ -0,0 +1,54 @@
> +/* Populate process Dwfl_Frame from perf_events sample.
> +
> +   Copyright (C) 2025 Red Hat, Inc.
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of either
> +
> +     * the GNU Lesser General Public License as published by the Free
> +       Software Foundation; either version 3 of the License, or (at
> +       your option) any later version
> +
> +   or
> +
> +     * the GNU General Public License as published by the Free
> +       Software Foundation; either version 2 of the License, or (at
> +       your option) any later version
> +
> +   or both in parallel, as here.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received copies of the GNU General Public License and
> +   the GNU Lesser General Public License along with this program.  If
> +   not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <libeblP.h>
> +#include <assert.h>
> +
> +bool
> +ebl_set_initial_registers_sample (Ebl *ebl,
> +                               const Dwarf_Word *regs, uint32_t n_regs,
> +                               uint64_t regs_mask, uint32_t abi,
> +                               ebl_tid_registers_t *setfunc,
> +                               void *arg)
> +{
> +  /* If set_initial_registers_sample is unsupported then 
> PERF_FRAME_REGS_MASK is zero.  */
> +  assert (ebl->set_initial_registers_sample != NULL);
> +  return ebl->set_initial_registers_sample (regs, n_regs, regs_mask, abi, 
> setfunc, arg);
> +}
> +
> +uint64_t
> +ebl_perf_frame_regs_mask (Ebl *ebl)
> +{
> +  /* ebl is declared NN */
> +  return ebl->perf_frame_regs_mask;
> +}
> diff --git a/libebl/libebl.h b/libebl/libebl.h
> index 731001d3..d87efeb5 100644
> --- a/libebl/libebl.h
> +++ b/libebl/libebl.h
> @@ -1,5 +1,5 @@
>  /* Interface for libebl.
> -   Copyright (C) 2000-2010, 2013, 2014, 2015, 2016, 2017 Red Hat, Inc.
> +   Copyright (C) 2000-2010, 2013, 2014, 2015, 2016, 2017, 2025 Red Hat, Inc.
>     This file is part of elfutils.
>  
>     This file is free software; you can redistribute it and/or modify
> @@ -340,6 +340,22 @@ extern bool ebl_set_initial_registers_tid (Ebl *ebl,
>  extern size_t ebl_frame_nregs (Ebl *ebl)
>    __nonnull_attribute__ (1);
>  
> +/* Callback to set process data from a linux perf_events sample.
> +   EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, otherwise the
> +   backend doesn't support unwinding from perf_events sample data.  */
> +extern bool ebl_set_initial_registers_sample (Ebl *ebl,
> +                                           const Dwarf_Word *regs, uint32_t 
> n_regs,
> +                                           uint64_t regs_mask, uint32_t abi,
> +                                           ebl_tid_registers_t *setfunc,
> +                                           void *arg)
> +  __nonnull_attribute__ (1, 2, 6);
> +
> +/* Preferred sample_regs_user mask to request from linux perf_events
> +   to allow unwinding on EBL architecture.  Omitting some of these
> +   registers may result in failed or inaccurate unwinding. */
> +extern uint64_t ebl_perf_frame_regs_mask (Ebl *ebl)
> +  __nonnull_attribute__ (1);
> +
>  /* Offset to apply to the value of the return_address_register, as
>     fetched from a Dwarf CFI.  This is used by some backends, where the
>     return_address_register actually contains the call address.  */
> diff --git a/libebl/libeblP.h b/libebl/libeblP.h
> index c408ed97..29c2402d 100644
> --- a/libebl/libeblP.h
> +++ b/libebl/libeblP.h
> @@ -1,5 +1,5 @@
>  /* Internal definitions for interface for libebl.
> -   Copyright (C) 2000-2009, 2013, 2014 Red Hat, Inc.
> +   Copyright (C) 2000-2009, 2013, 2014, 2025 Red Hat, Inc.
>     This file is part of elfutils.
>  
>     This file is free software; you can redistribute it and/or modify
> @@ -60,6 +60,11 @@ struct ebl
>       Ebl architecture can unwind iff FRAME_NREGS > 0.  */
>    size_t frame_nregs;
>  
> +  /* Preferred sample_regs_user mask to request from linux perf_events
> +     to allow unwinding.  Ebl architecture supports unwinding from
> +     perf_events sample data iff PERF_FRAME_REGS_MASK > 0.  */
> +  uint32_t perf_frame_regs_mask;
> +

In all other places the mask is an uint64_t

Also the libebl interface is very verbose for this mask.
Clearly copied from how frame_nregs is done.
So fine for now. But we might want to look into something simpler for
both of these. Both fetching frame_nregs and perf_frame_regs_mask feel
very inefficient.

Cheers,

Mark

Reply via email to