Changes for v3: - Reworked elftab to incorporate dev/ino into the caching key (to allow caching modules from different filesystems e.g. a main filesystem and a container filesystem) and guard more carefully against collisions.
- Add external API for implementing a custom find_elf callback that reads/writes the cache. Changes for v2: - Add locking for elftab. This is needed in addition to the intrinsic locking in dynamicsizehash_concurrent to avoid having cache_elf expose an incomplete dwfltracker_elf_info* entry to other threads while its data is being populated / replaced. - Tidy dwfl_process_tracker_find_elf.c into the main find_elf callback and two functions to consider (in future) making into a public api for custom cached callbacks. * * * The Dwfl_Process_Tracker includes a dynamicsizehash cache which maps file paths to Elf * (or rather, dwfl_tracker_elf_info * storing fd and Elf *). We provide a dwfl_process_tracker_find_elf callback which checks the cache for an already-loaded Elf * and, if missing, populates the cache with the fd returned by dwfl_linux_proc_find_elf. Later, open_elf updates the cache with the Elf * for that fd. The commented asserts still catch some cases where a redundant Elf * is being created without checking the cache. Since the Elf * outlasts the Dwfl that created it, we use the (convenient, already-existing) reference count field in Elf * to retain the data in the table. Then dwfl_end calling elf_end will decrement the refcount cleanly, and dwfl_process_tracker_end will issue another elf_end call. * libdwfl/libdwfl.h (dwfl_process_tracker_find_elf): New function, serves as a cached version of the dwfl_linux_proc_find_elf callback. (dwfl_process_tracker_find_cached_elf): New function. (dwfl_process_tracker_cache_elf): New function. (dwfl_module_gettracker): New function, gives external users a way to access Dwfl_Process_Tracker given a Dwfl_Module. * libdwfl/libdwflP.h (dwfltracker_elf_info): New struct typedef. (struct Dwfl_Process_Tracker): Add dynamicsizehash table of dwfltracker_elf_info structs + associated rwlock. (INTDECLs): Add INTDECL for dwfl_process_tracker_find_cached_elf, dwfl_process_tracker_cache_elf, dwfl_module_gettracker. * libdwfl/dwfl_process_tracker_elftab.c: New file, instantiates lib/dynamicsizehash_concurrent.c to store dwfltracker_elf_info structs. * libdwfl/dwfl_process_tracker_elftab.h: New file, ditto. * libdwfl/libdwfl_next_prime.c: New file. * libdwfl/dwfl_process_tracker.c (dwfl_process_tracker_begin): Init elftab. (dwfl_process_tracker_end): Clean up elftab. Lock and iterate the hash to free tracker->elftab.table items. * libdwfl/dwfl_process_tracker_find_elf.c: New file, implements a find_elf callback that wraps dwfl_linux_proc_find_elf with additional caching logic, and an API to access the Dwfl_Process_Tracker Elf cache when implementing a custom find_elf callback. * libdwfl/dwfl_module_getdwarf.c (open_elf): Cache file->elf in Dwfl_Process_Tracker. Must be done here as dwfl_linux_proc_find_elf opens an fd but does not yet create the Elf *. Also, increment Elf * refcount so the table retains the Elf * after caller's dwfl_end cleanup. * libdwfl/Makefile.am (libdwfl_a_SOURCES): Add dwfl_process_tracker_find_elf.c, dwfl_process_tracker_elftab.c, libdwfl_next_prime.c. (noinst_HEADERS): Add dwfl_process_tracker_elftab.h. * libdw/libdw.map: Add dwfl_process_tracker_find_elf, dwfl_process_tracker_find_cached_elf, dwfl_process_tracker_cache_elf, dwfl_module_gettracker. --- libdw/libdw.map | 4 + libdwfl/Makefile.am | 7 +- libdwfl/dwfl_module_getdwarf.c | 25 +++- libdwfl/dwfl_process_tracker.c | 25 ++++ libdwfl/dwfl_process_tracker_elftab.c | 47 ++++++ libdwfl/dwfl_process_tracker_elftab.h | 40 +++++ libdwfl/dwfl_process_tracker_find_elf.c | 191 ++++++++++++++++++++++++ libdwfl/libdwfl.h | 41 +++++ libdwfl/libdwflP.h | 21 ++- libdwfl/libdwfl_next_prime.c | 6 + 10 files changed, 402 insertions(+), 5 deletions(-) create mode 100644 libdwfl/dwfl_process_tracker_elftab.c create mode 100644 libdwfl/dwfl_process_tracker_elftab.h create mode 100644 libdwfl/dwfl_process_tracker_find_elf.c create mode 100644 libdwfl/libdwfl_next_prime.c diff --git a/libdw/libdw.map b/libdw/libdw.map index a34ba339..77be3906 100644 --- a/libdw/libdw.map +++ b/libdw/libdw.map @@ -394,4 +394,8 @@ ELFUTILS_0.193 { dwfl_process_tracker_begin; dwfl_begin_with_tracker; dwfl_process_tracker_end; + dwfl_process_tracker_find_cached_elf; + dwfl_process_tracker_cache_elf; + dwfl_module_gettracker; + dwfl_process_tracker_find_elf; } ELFUTILS_0.192; diff --git a/libdwfl/Makefile.am b/libdwfl/Makefile.am index b41122e3..dd99db46 100644 --- a/libdwfl/Makefile.am +++ b/libdwfl/Makefile.am @@ -71,8 +71,9 @@ libdwfl_a_SOURCES = dwfl_begin.c dwfl_end.c dwfl_error.c dwfl_version.c \ link_map.c core-file.c open.c image-header.c \ dwfl_frame.c frame_unwind.c dwfl_frame_pc.c \ linux-pid-attach.c linux-core-attach.c dwfl_frame_regs.c \ - dwfl_process_tracker.c \ - dwfl_perf_frame.c \ + dwfl_process_tracker.c dwfl_process_tracker_find_elf.c \ + dwfl_process_tracker_elftab.c libdwfl_next_prime.c \ + dwfl_perf_frame.c \ gzip.c debuginfod-client.c if BZLIB @@ -94,7 +95,7 @@ libeu = ../lib/libeu.a libdwfl_pic_a_SOURCES = am_libdwfl_pic_a_OBJECTS = $(libdwfl_a_SOURCES:.c=.os) -noinst_HEADERS = libdwflP.h +noinst_HEADERS = libdwflP.h dwfl_process_tracker_elftab.h EXTRA_libdwfl_a_DEPENDENCIES = libdwfl.manifest diff --git a/libdwfl/dwfl_module_getdwarf.c b/libdwfl/dwfl_module_getdwarf.c index 6f98c02b..2518c3aa 100644 --- a/libdwfl/dwfl_module_getdwarf.c +++ b/libdwfl/dwfl_module_getdwarf.c @@ -1,5 +1,5 @@ /* Find debugging and symbol information for a module in libdwfl. - Copyright (C) 2005-2012, 2014, 2015 Red Hat, Inc. + Copyright (C) 2005-2012, 2014, 2015, 2025 Red Hat, Inc. This file is part of elfutils. This file is free software; you can redistribute it and/or modify @@ -79,6 +79,29 @@ open_elf (Dwfl_Module *mod, struct dwfl_file *file) if (error != DWFL_E_NOERROR) return error; + /* Cache file->elf in Dwfl_Process_Tracker if available: */ + if (mod->dwfl->tracker != NULL && file->name != NULL) + { + rwlock_wrlock (&mod->dwfl->tracker->elftab_lock); + dwfltracker_elf_info *ent = dwfltracker_elftab_find (&mod->dwfl->tracker->elftab, elf_hash(file->name)); + if (ent != NULL) + { + /* TODO(REVIEW): The following assertions are still + triggered on certain code paths that acquire fds or + create Elf structs without checking the caching mechanism + first. This is not a serious problem, and can be fixed + gradually. */ + + /* assert(ent->elf == NULL || ent->elf == file->elf); */ /* Guard against redundant/leaked Elf *. */ + /* assert(ent->fd == file->fd); */ /* Guard against redundant open. */ + + ent->elf = file->elf; + /* XXX Dwfl_Process_Tracker also holds the Elf * jointly with the caller: */ + ent->elf->ref_count++; + } + rwlock_unlock (&mod->dwfl->tracker->elftab_lock); + } + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (file->elf, &ehdr_mem); if (ehdr == NULL) { diff --git a/libdwfl/dwfl_process_tracker.c b/libdwfl/dwfl_process_tracker.c index c42d8ad2..24ee2da7 100644 --- a/libdwfl/dwfl_process_tracker.c +++ b/libdwfl/dwfl_process_tracker.c @@ -32,6 +32,8 @@ #include "libdwflP.h" +#define HTAB_DEFAULT_SIZE 1021 + Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callbacks) { Dwfl_Process_Tracker *tracker = calloc (1, sizeof *tracker); @@ -41,6 +43,9 @@ Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callback return tracker; } + dwfltracker_elftab_init (&tracker->elftab, HTAB_DEFAULT_SIZE); + rwlock_init (tracker->elftab_lock); + tracker->callbacks = callbacks; return tracker; } @@ -61,6 +66,26 @@ void dwfl_process_tracker_end (Dwfl_Process_Tracker *tracker) if (tracker == NULL) return; + /* HACK to allow iteration of dynamicsizehash_concurrent. */ + /* XXX Based on lib/dynamicsizehash_concurrent.c free(). */ + rwlock_fini (tracker->elftab_lock); + pthread_rwlock_destroy(&tracker->elftab.resize_rwl); + for (size_t idx = 1; idx <= tracker->elftab.size; idx++) + { + dwfltracker_elftab_ent *ent = &tracker->elftab.table[idx]; + if (ent->hashval == 0) + continue; + dwfltracker_elf_info *t = (dwfltracker_elf_info *) atomic_load_explicit (&ent->val_ptr, + memory_order_relaxed); + free(t->module_name); + if (t->fd >= 0) + close(t->fd); + if (t->elf != NULL) + elf_end(t->elf); + free(t); /* TODO: Check necessity. */ + } + free (tracker->elftab.table); + /* TODO: Call dwfl_end for each Dwfl connected to this tracker. */ free (tracker); } diff --git a/libdwfl/dwfl_process_tracker_elftab.c b/libdwfl/dwfl_process_tracker_elftab.c new file mode 100644 index 00000000..5addb617 --- /dev/null +++ b/libdwfl/dwfl_process_tracker_elftab.c @@ -0,0 +1,47 @@ +/* Dwfl_Process_Tracker Elf table implementation. + Copyright (C) 2025 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> + +#include <libdwflP.h> + +/* Definitions for the Elf table. */ +#define TYPE dwfltracker_elf_info * +#define NAME dwfltracker_elftab +#define ITERATE 1 +/* TODO(REVIEW): Omit reverse? */ +#define REVERSE 1 +#define COMPARE(a, b) \ + (strcmp ((a)->module_name, (b)->module_name) \ + && (a)->dev == (b)->dev && (a)->ino == (b)->ino) + +#include "../lib/dynamicsizehash_concurrent.c" diff --git a/libdwfl/dwfl_process_tracker_elftab.h b/libdwfl/dwfl_process_tracker_elftab.h new file mode 100644 index 00000000..d78a7394 --- /dev/null +++ b/libdwfl/dwfl_process_tracker_elftab.h @@ -0,0 +1,40 @@ +/* Dwfl_Process_Tracker Elf table. + Copyright (C) 2025 Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see <http://www.gnu.org/licenses/>. */ + +#ifndef DWFL_PROCESS_TRACKER_ELFTAB_H +#define DWFL_PROCESS_TRACKER_ELFTAB_H 1 + +/* Definitions for the Elf table. */ +#define TYPE dwfltracker_elf_info * +#define NAME dwfltracker_elftab +#define ITERATE 1 +#define COMPARE(a, b) \ + strcmp ((a)->module_name, (b)->module_name) +#include <dynamicsizehash_concurrent.h> + +#endif diff --git a/libdwfl/dwfl_process_tracker_find_elf.c b/libdwfl/dwfl_process_tracker_find_elf.c new file mode 100644 index 00000000..932e905d --- /dev/null +++ b/libdwfl/dwfl_process_tracker_find_elf.c @@ -0,0 +1,191 @@ +/* Find Elf file from dwfl_linux_proc_report, cached via Dwfl_Process_Tracker. + Copyright (C) 2025, Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see <http://www.gnu.org/licenses/>. */ + + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <sys/stat.h> +#include "../libelf/libelfP.h" +/* XXX: Private header needed for Elf * ref_count field. */ +/* TODO: Consider dup_elf() rather than direct ref_count access. */ + +#include "libdwflP.h" + +unsigned long int +elf_info_hash (const char *module_name, dev_t st_dev, ino_t st_ino) +{ + unsigned long int hval = elf_hash(module_name); + hval ^= (unsigned long int)st_dev; + hval ^= (unsigned long int)st_ino; + return hval; +} + +int +dwfl_process_tracker_find_cached_elf (Dwfl_Process_Tracker *tracker, + const char *module_name, + const char *module_path, + char **file_name, Elf **elfp) +{ + dwfltracker_elf_info *ent = NULL; + int rc; + struct stat sb; + + rc = stat(module_path, &sb); + if (rc < 0) + return -1; + unsigned long int hval = elf_info_hash(module_name, sb.st_dev, sb.st_ino); + + rwlock_rdlock(tracker->elftab_lock); + ent = dwfltracker_elftab_find(&tracker->elftab, hval); + rwlock_unlock(tracker->elftab_lock); + + /* Guard against collisions. + TODO: Need proper chaining, dynamicsizehash_concurrent isn't really + equipped for it. */ + if (ent == NULL || !strcmp (module_name, ent->module_name) + || ent->dev != sb.st_dev || ent->ino != sb.st_ino) + return -1; + + /* Verify that ent->fd has not been updated: */ + rc = fstat(ent->fd, &sb); + if (rc < 0 || ent->dev != sb.st_dev || ent->ino != sb.st_ino + || ent->last_mtime != sb.st_mtime) + return -1; + + if (ent->elf != NULL) + ent->elf->ref_count++; + *elfp = ent->elf; + *file_name = strdup(ent->module_name); + return ent->fd; +} +INTDEF(dwfl_process_tracker_find_cached_elf) + +bool +dwfl_process_tracker_cache_elf (Dwfl_Process_Tracker *tracker, + const char *module_name, + const char *file_name __attribute__((unused)), + Elf *elf, int fd) +{ + dwfltracker_elf_info *ent = NULL; + int rc; + struct stat sb; + + rc = fstat(fd, &sb); + if (rc < 0) + return false; + unsigned long int hval = elf_info_hash(module_name, sb.st_dev, sb.st_ino); + + rwlock_wrlock(tracker->elftab_lock); + ent = dwfltracker_elftab_find(&tracker->elftab, hval); + /* Guard against collisions. + TODO: Need proper chaining, dynamicsizehash_concurrent isn't really + equipped for it. */ + if (ent != NULL && (!strcmp (module_name, ent->module_name) + || ent->dev != sb.st_dev || ent->ino != sb.st_ino)) + { + rwlock_unlock(tracker->elftab_lock); + return false; + } + if (ent == NULL) + { + ent = calloc (1, sizeof (dwfltracker_elf_info)); + ent->module_name = strdup(module_name); + + if (dwfltracker_elftab_insert(&tracker->elftab, hval, ent) != 0) + { + free(ent->module_name); + free(ent); + rwlock_unlock(tracker->elftab_lock); + assert(false); /* Should not occur due to the wrlock on elftab. */ + } + } + else + { + /* Safe to replace the existing elf, keep module_name. */ + if (ent->elf != NULL) + elf_end(ent->elf); + return true; + } + if (elf != NULL) + elf->ref_count++; + ent->elf = elf; + ent->fd = fd; + if (rc == 0) /* TODO(REVIEW): Report rc != 0 via errno? */ + { + ent->dev = sb.st_dev; + ent->ino = sb.st_ino; + ent->last_mtime = sb.st_mtime; + } + rwlock_unlock(tracker->elftab_lock); + return true; +} +INTDEF(dwfl_process_tracker_cache_elf) + +Dwfl_Process_Tracker * +dwfl_module_gettracker (Dwfl_Module *mod) +{ + if (mod->dwfl == NULL) + return NULL; + return mod->dwfl->tracker; +} +INTDEF(dwfl_module_gettracker) + +int +dwfl_process_tracker_find_elf (Dwfl_Module *mod, + void **userdata __attribute__ ((unused)), + const char *module_name, Dwarf_Addr base, + char **file_name, Elf **elfp) +{ + /* TODO(REVIEW): Assuming this isn't called with elfp already set. */ + assert (*elfp == NULL); + + Dwfl_Process_Tracker *tracker = INTUSE(dwfl_module_gettracker) (mod); + int fd; + + if (tracker != NULL) + { + fd = INTUSE(dwfl_process_tracker_find_cached_elf) + (tracker, module_name, module_name, + file_name, elfp); + if (fd >= 0) + return fd; + } + + fd = INTUSE(dwfl_linux_proc_find_elf) (mod, userdata, module_name, + base, file_name, elfp); + + if (tracker != NULL && fd >= 0 && *file_name != NULL) + { + INTUSE(dwfl_process_tracker_cache_elf) + (tracker, module_name, + *file_name, *elfp, fd); + } + return fd; +} diff --git a/libdwfl/libdwfl.h b/libdwfl/libdwfl.h index 0db5b74d..d6b3b423 100644 --- a/libdwfl/libdwfl.h +++ b/libdwfl/libdwfl.h @@ -133,6 +133,40 @@ extern Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *c extern Dwfl *dwfl_begin_with_tracker (Dwfl_Process_Tracker *tracker) __nonnull_attribute__ (1); +/* Find the Dwfl corresponding to PID. If CALLBACK is non-NULL + and the Dwfl has not been created, invoke CALLBACK to create + the Dwfl and then store it in the tracker. */ +extern Dwfl *dwfl_process_tracker_find_pid (Dwfl_Process_Tracker *tracker, + pid_t pid, + Dwfl *(*callback) (Dwfl_Process_Tracker *tracker, + pid_t pid, + void *arg), + void *arg) + __nonnull_attribute__ (1); + +/* Try to find a cached Elf corresponding to MODULE_NAME. Verifies + that the cached Elf has dev/ino/mtime matching the file on disk. + If MODULE_PATH is non-NULL, it gives an alternate location for the + module e.g. /proc/PID/root/MODULE_NAME. Stores FILE_NAME and ELFP + values. Returns fd similar to the find_elf callbacks, or -1 if + cached Elf was not found. */ +extern int dwfl_process_tracker_find_cached_elf (Dwfl_Process_Tracker *tracker, + const char *module_name, + const char *module_path, + char **file_name, Elf **elfp); + +/* Record a cached Elf corresponding to MODULE_NAME. FILE_NAME and FD + values must be provided, similar to the output of a find_elf callback. + Returns TRUE iff the Elf was successfully stored in the cache. */ +extern bool dwfl_process_tracker_cache_elf (Dwfl_Process_Tracker *tracker, + const char *module_name, + const char *file_name, + Elf *elf, int fd); + +/* For implementing a find_elf callback based on the prior two functions. + Returns the Dwfl_Process_Tracker corresponding to MOD. */ +extern Dwfl_Process_Tracker *dwfl_module_gettracker (Dwfl_Module *mod); + /* End a multi-process session. */ extern void dwfl_process_tracker_end (Dwfl_Process_Tracker *tracker); @@ -409,6 +443,13 @@ extern int dwfl_linux_proc_find_elf (Dwfl_Module *mod, void **userdata, const char *module_name, Dwarf_Addr base, char **file_name, Elf **); +/* The same callback, except this first attempts to look up a cached + Elf* and fd from the Dwfl_Module's Dwfl_Process_Tracker (if any). + If a new Elf* has to be created, this saves it to the cache. */ +extern int dwfl_process_tracker_find_elf (Dwfl_Module *mod, void **userdata, + const char *module_name, Dwarf_Addr base, + char **file_name, Elf **); + /* Standard argument parsing for using a standard callback set. */ struct argp; extern const struct argp *dwfl_standard_argp (void) __const_attribute__; diff --git a/libdwfl/libdwflP.h b/libdwfl/libdwflP.h index 885acae0..2c6b1669 100644 --- a/libdwfl/libdwflP.h +++ b/libdwfl/libdwflP.h @@ -101,12 +101,28 @@ typedef enum { DWFL_ERRORS DWFL_E_NUM } Dwfl_Error; extern int __libdwfl_canon_error (Dwfl_Error) internal_function; extern void __libdwfl_seterrno (Dwfl_Error) internal_function; +/* Hash table for Elf *. */ +typedef struct +{ + char *module_name; /* dwfltracker_elftab_ent is used iff non-NULL. */ + int fd; + Elf *elf; + dev_t dev; + ino_t ino; + time_t last_mtime; +} dwfltracker_elf_info; +#include "dwfl_process_tracker_elftab.h" + struct Dwfl_Process_Tracker { const Dwfl_Callbacks *callbacks; - /* ... */ + + /* Table of cached Elf * including fd, path, fstat info. */ + dwfltracker_elftab elftab; + rwlock_define(, elftab_lock); }; + /* Resources we might keep for the user about the core file that the Dwfl might have been created from. Can currently only be set through std-argp. */ @@ -782,6 +798,9 @@ INTDECL (dwfl_module_getsymtab) INTDECL (dwfl_module_getsymtab_first_global) INTDECL (dwfl_module_getsrc) INTDECL (dwfl_module_report_build_id) +INTDECL (dwfl_module_gettracker) +INTDECL (dwfl_process_tracker_find_cached_elf) +INTDECL (dwfl_process_tracker_cache_elf) INTDECL (dwfl_report_elf) INTDECL (dwfl_report_begin) INTDECL (dwfl_report_begin_add) diff --git a/libdwfl/libdwfl_next_prime.c b/libdwfl/libdwfl_next_prime.c new file mode 100644 index 00000000..f99d4c6c --- /dev/null +++ b/libdwfl/libdwfl_next_prime.c @@ -0,0 +1,6 @@ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#define next_prime attribute_hidden __libdwfl_next_prime +#include "../lib/next_prime.c" -- 2.47.0