For parallel applications that need the information in the DIEs, the
Dwarf_Abbrev hash table et al. become a massive data race. This fixes that by:

1. Adding atomics & locks to the hash table to manage concurrency
  (lib/dynamicsizehash_concurrent.{c,h})
2. Adding a lock & array structure to the memory manager (pseudo-TLS)
  (libdwP.h, libdw_alloc.c)
3. Adding extra configure options for Helgrind/DRD annotations
  (configure.ac)
4. Including "stdatomic.h" from FreeBSD, to support C11-style atomics.
  (lib/stdatomic.h)

Signed-off-by: Jonathon Anderson <jm...@rice.edu>
Signed-off-by: Srđan Milaković <sm...@rice.edu>
---

Notes:
- GCC >= 4.9 provides <stdatomic.h> natively; for those versions
  lib/stdatomic.h could be removed or disabled. We can also rewrite the
  file if the copyright becomes an issue.
- Currently the concurrent hash table is always enabled, performance-wise there is no known difference between it and the non-concurrent version. This can be changed to toggle with --enable-thread-safety if preferred. - Another implementation of #2 above might use dynamic TLS (pthread_key_*),
  we chose this implementation to reduce the overall complexity.
  This can also be bound to --enable-thread-safety if preferred.

ChangeLog                        |   5 +
configure.ac                     |  30 ++
lib/ChangeLog                    |   6 +
lib/Makefile.am                  |   5 +-
lib/dynamicsizehash_concurrent.c | 522 +++++++++++++++++++++++++++++++
lib/dynamicsizehash_concurrent.h | 118 +++++++
lib/stdatomic.h                  | 442 ++++++++++++++++++++++++++
libdw/ChangeLog                  |   9 +
libdw/Makefile.am                |   2 +-
libdw/dwarf_abbrev_hash.c        |   2 +-
libdw/dwarf_abbrev_hash.h        |   2 +-
libdw/dwarf_begin_elf.c          |  13 +-
libdw/dwarf_end.c                |  24 +-
libdw/libdwP.h                   |  13 +-
libdw/libdw_alloc.c              |  70 ++++-
15 files changed, 1240 insertions(+), 23 deletions(-)
create mode 100644 lib/dynamicsizehash_concurrent.c
create mode 100644 lib/dynamicsizehash_concurrent.h
create mode 100644 lib/stdatomic.h

diff --git a/ChangeLog b/ChangeLog
index bed3999f..93907ddd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2019-08-15  Jonathon Anderson <jm...@rice.edu>
+
+       * configure.ac: Add new --enable-valgrind-annotations
+       * configure.ac: Add new --with-valgrind (headers only)
+
2019-08-13  Mark Wielaard  <m...@klomp.org>

        * configure.ac: Set version to 0.177.
diff --git a/configure.ac b/configure.ac
index c443fa3b..c5406b44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,35 @@ if test "$use_valgrind" = yes; then
fi
AM_CONDITIONAL(USE_VALGRIND, test "$use_valgrind" = yes)

+AC_ARG_WITH([valgrind],
+AS_HELP_STRING([--with-valgrind],[include directory for Valgrind headers]),
+[with_valgrind_headers=$withval], [with_valgrind_headers=no])
+if test "x$with_valgrind_headers" != xno; then
+    save_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS -I$with_valgrind_headers"
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+      #include <valgrind/valgrind.h>
+      int main() { return 0; }
+    ]])], [ HAVE_VALGRIND_HEADERS="yes"
+            CFLAGS="$save_CFLAGS -I$with_valgrind_headers" ],
+ [ AC_MSG_ERROR([invalid valgrind include directory: $with_valgrind_headers]) ])
+fi
+
+AC_ARG_ENABLE([valgrind-annotations],
+AS_HELP_STRING([--enable-valgrind-annotations],[insert extra annotations for better valgrind support]),
+[use_vg_annotations=$enableval], [use_vg_annotations=no])
+if test "$use_vg_annotations" = yes; then
+    if test "x$HAVE_VALGRIND_HEADERS" != "xyes"; then
+      AC_MSG_CHECKING([whether Valgrind headers are available])
+      AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+        #include <valgrind/valgrind.h>
+        int main() { return 0; }
+      ]])], [ AC_MSG_RESULT([yes]) ],
+ [ AC_MSG_ERROR([valgrind annotations requested but no headers are available]) ])
+    fi
+fi
+AM_CONDITIONAL(USE_VG_ANNOTATIONS, test "$use_vg_annotations" = yes)
+
AC_ARG_ENABLE([install-elfh],
AS_HELP_STRING([--enable-install-elfh],[install elf.h in include dir]),
               [install_elfh=$enableval], [install_elfh=no])
@@ -668,6 +697,7 @@ AC_MSG_NOTICE([
  OTHER FEATURES
    Deterministic archives by default  : ${default_ar_deterministic}
    Native language support            : ${USE_NLS}
+    Extra Valgrind annotations         : ${use_vg_annotations}

  EXTRA TEST FEATURES (used with make check)
    have bunzip2 installed (required)  : ${HAVE_BUNZIP2}
diff --git a/lib/ChangeLog b/lib/ChangeLog
index 7381860c..e6d08509 100644
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,9 @@
+2019-08-08  Jonathon Anderson  <jm...@rice.edu>
+
+       * dynamicsizehash_concurrent.{c,h}: New files.
+       * stdatomic.h: New file, taken from FreeBSD.
+       * Makefile.am (noinst_HEADERS): Added *.h above.
+
2019-05-03  Rosen Penev  <ros...@gmail.com>

        * color.c (parse_opt): Cast program_invocation_short_name to char *.
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 36d21a07..af7228b9 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -38,8 +38,9 @@ libeu_a_SOURCES = xstrdup.c xstrndup.c xmalloc.c next_prime.c \
                  color.c printversion.c

noinst_HEADERS = fixedsizehash.h libeu.h system.h dynamicsizehash.h list.h \
-                eu-config.h color.h printversion.h bpf.h
-EXTRA_DIST = dynamicsizehash.c
+                eu-config.h color.h printversion.h bpf.h \
+                dynamicsizehash_concurrent.h stdatomic.h
+EXTRA_DIST = dynamicsizehash.c dynamicsizehash_concurrent.c

if !GPROF
xmalloc_CFLAGS = -ffunction-sections
diff --git a/lib/dynamicsizehash_concurrent.c b/lib/dynamicsizehash_concurrent.c
new file mode 100644
index 00000000..d645b143
--- /dev/null
+++ b/lib/dynamicsizehash_concurrent.c
@@ -0,0 +1,522 @@
+/* Copyright (C) 2000-2019 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Srdan Milakovic <sm...@rice.edu>, 2019.
+   Derived from Ulrich Drepper <drep...@redhat.com>, 2000.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <system.h>
+#include <pthread.h>
+
+/* Before including this file the following macros must be defined:
+
+   NAME      name of the hash table structure.
+   TYPE      data type of the hash table entries
+   COMPARE   comparison function taking two pointers to TYPE objects
+
+   The following macros if present select features:
+
+   ITERATE   iterating over the table entries is possible
+   REVERSE   iterate in reverse order of insert
+ */
+
+
+static size_t
+lookup (NAME *htab, HASHTYPE hval, TYPE val __attribute__ ((unused)))
+{
+ /* First hash function: simply take the modul but prevent zero. Small values + can skip the division, which helps performance when this is common. */
+  size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size);
+
+#if COMPARE != 0 /* A handful of tables don't actually compare the entries in + the table, they instead rely on the hash. In that case, we
+                    can skip parts that relate to the value. */
+  TYPE val_ptr;
+#endif
+  HASHTYPE hash;
+
+  hash = atomic_load_explicit(&htab->table[idx].hashval,
+                              memory_order_acquire);
+  if (hash == hval)
+    {
+#if COMPARE == 0
+      return idx;
+#else
+      val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                            memory_order_acquire);
+      if (COMPARE(val_ptr, val) == 0)
+          return idx;
+#endif
+    }
+  else if (hash == 0)
+    {
+      return 0;
+    }
+
+  /* Second hash function as suggested in [Knuth].  */
+  HASHTYPE second_hash = 1 + hval % (htab->size - 2);
+
+  for(;;)
+    {
+      if (idx <= second_hash)
+          idx = htab->size + idx - second_hash;
+      else
+          idx -= second_hash;
+
+      hash = atomic_load_explicit(&htab->table[idx].hashval,
+                                  memory_order_acquire);
+      if (hash == hval)
+        {
+#if COMPARE == 0
+          return idx;
+#else
+ val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                                memory_order_acquire);
+          if (COMPARE(val_ptr, val) == 0)
+              return idx;
+#endif
+        }
+      else if (hash == 0)
+        {
+          return 0;
+        }
+    }
+}
+
+static int
+insert_helper (NAME *htab, HASHTYPE hval, TYPE val)
+{
+ /* First hash function: simply take the modul but prevent zero. Small values + can skip the division, which helps performance when this is common. */
+  size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size);
+
+  TYPE val_ptr;
+  HASHTYPE hash;
+
+  hash = atomic_load_explicit(&htab->table[idx].hashval,
+                              memory_order_acquire);
+  if (hash == hval)
+    {
+      val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                            memory_order_acquire);
+      if (COMPARE(val_ptr, val) != 0)
+          return -1;
+    }
+  else if (hash == 0)
+    {
+      val_ptr = NULL;
+ atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr,
+                                              (uintptr_t *) &val_ptr,
+                                              (uintptr_t) val,
+                                              memory_order_acquire,
+                                              memory_order_acquire);
+
+      if (val_ptr == NULL)
+        {
+          atomic_store_explicit(&htab->table[idx].hashval, hval,
+                                memory_order_release);
+          return 0;
+        }
+      else
+        {
+          do
+            {
+              hash = atomic_load_explicit(&htab->table[idx].val_ptr,
+                                          memory_order_acquire);
+            }
+          while (hash == 0);
+        }
+    }
+
+  /* Second hash function as suggested in [Knuth].  */
+  HASHTYPE second_hash = 1 + hval % (htab->size - 2);
+
+  for(;;)
+    {
+      if (idx <= second_hash)
+          idx = htab->size + idx - second_hash;
+      else
+          idx -= second_hash;
+
+      hash = atomic_load_explicit(&htab->table[idx].hashval,
+                                  memory_order_acquire);
+      if (hash == hval)
+        {
+ val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                                memory_order_acquire);
+          if (COMPARE(val_ptr, val) != 0)
+              return -1;
+        }
+      else if (hash == 0)
+        {
+          val_ptr = NULL;
+ atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr, + (uintptr_t *) &val_ptr,
+                                                  (uintptr_t) val,
+                                                  memory_order_acquire,
+ memory_order_acquire);
+
+          if (val_ptr == NULL)
+            {
+              atomic_store_explicit(&htab->table[idx].hashval, hval,
+                                    memory_order_release);
+              return 0;
+            }
+          else
+            {
+              do
+                {
+ hash = atomic_load_explicit(&htab->table[idx].val_ptr,
+                                              memory_order_acquire);
+                }
+              while (hash == 0);
+            }
+        }
+    }
+}
+
+#define NO_RESIZING 0u
+#define ALLOCATING_MEMORY 1u
+#define MOVING_DATA 3u
+#define CLEANING 2u
+
+#define STATE_BITS 2u
+#define STATE_INCREMENT (1u << STATE_BITS)
+#define STATE_MASK (STATE_INCREMENT - 1)
+#define GET_STATE(A) ((A) & STATE_MASK)
+
+#define IS_NO_RESIZE_OR_CLEANING(A) (((A) & 0x1u) == 0)
+
+#define GET_ACTIVE_WORKERS(A) ((A) >> STATE_BITS)
+
+#define INITIALIZATION_BLOCK_SIZE 256
+#define MOVE_BLOCK_SIZE 256
+#define CEIL(A, B) (((A) + (B) - 1) / (B))
+
+/* Initializes records and copies the data from the old table.
+   It can share work with other threads */
+static void resize_helper(NAME *htab, int blocking)
+{
+  size_t num_old_blocks = CEIL(htab->old_size, MOVE_BLOCK_SIZE);
+  size_t num_new_blocks = CEIL(htab->size, INITIALIZATION_BLOCK_SIZE);
+
+  size_t my_block;
+  size_t num_finished_blocks = 0;
+
+ while ((my_block = atomic_fetch_add_explicit(&htab->next_init_block, 1,
+                                                memory_order_acquire))
+                                                    < num_new_blocks)
+    {
+      size_t record_it = my_block * INITIALIZATION_BLOCK_SIZE;
+      size_t record_end = (my_block + 1) * INITIALIZATION_BLOCK_SIZE;
+      if (record_end > htab->size)
+          record_end = htab->size;
+
+      while (record_it++ != record_end)
+        {
+ atomic_init(&htab->table[record_it].hashval, (uintptr_t) NULL); + atomic_init(&htab->table[record_it].val_ptr, (uintptr_t) NULL);
+        }
+
+      num_finished_blocks++;
+    }
+
+  atomic_fetch_add_explicit(&htab->num_initialized_blocks,
+                            num_finished_blocks, memory_order_release);
+  while (atomic_load_explicit(&htab->num_initialized_blocks,
+                              memory_order_acquire) != num_new_blocks);
+
+  /* All block are initialized, start moving */
+  num_finished_blocks = 0;
+ while ((my_block = atomic_fetch_add_explicit(&htab->next_move_block, 1,
+                                                memory_order_acquire))
+                                                    < num_old_blocks)
+    {
+      size_t record_it = my_block * MOVE_BLOCK_SIZE;
+      size_t record_end = (my_block + 1) * MOVE_BLOCK_SIZE;
+      if (record_end > htab->old_size)
+          record_end = htab->old_size;
+
+      while (record_it++ != record_end)
+        {
+          TYPE val_ptr = (TYPE) atomic_load_explicit(
+              &htab->old_table[record_it].val_ptr,
+              memory_order_acquire);
+          if (val_ptr == NULL)
+              continue;
+
+          HASHTYPE hashval = atomic_load_explicit(
+              &htab->old_table[record_it].hashval,
+              memory_order_acquire);
+          assert(hashval);
+
+          insert_helper(htab, hashval, val_ptr);
+        }
+
+      num_finished_blocks++;
+    }
+
+ atomic_fetch_add_explicit(&htab->num_moved_blocks, num_finished_blocks,
+                            memory_order_release);
+
+  if (blocking)
+      while (atomic_load_explicit(&htab->num_moved_blocks,
+ memory_order_acquire) != num_old_blocks);
+}
+
+static void
+resize_master(NAME *htab)
+{
+  htab->old_size = htab->size;
+  htab->old_table = htab->table;
+
+  htab->size = next_prime(htab->size * 2);
+  htab->table = malloc((1 + htab->size) * sizeof(htab->table[0]));
+  assert(htab->table);
+
+  /* Change state from ALLOCATING_MEMORY to MOVING_DATA */
+  atomic_fetch_xor_explicit(&htab->resizing_state,
+                            ALLOCATING_MEMORY ^ MOVING_DATA,
+                            memory_order_release);
+
+  resize_helper(htab, 1);
+
+  /* Change state from MOVING_DATA to CLEANING */
+ size_t resize_state = atomic_fetch_xor_explicit(&htab->resizing_state, + MOVING_DATA ^ CLEANING, + memory_order_acq_rel);
+  while (GET_ACTIVE_WORKERS(resize_state) != 0)
+      resize_state = atomic_load_explicit(&htab->resizing_state,
+                                          memory_order_acquire);
+
+  /* There are no more active workers */
+ atomic_store_explicit(&htab->next_init_block, 0, memory_order_relaxed);
+  atomic_store_explicit(&htab->num_initialized_blocks, 0,
+                        memory_order_relaxed);
+
+ atomic_store_explicit(&htab->next_move_block, 0, memory_order_relaxed); + atomic_store_explicit(&htab->num_moved_blocks, 0, memory_order_relaxed);
+
+  free(htab->old_table);
+
+  /* Change state to NO_RESIZING */
+ atomic_fetch_xor_explicit(&htab->resizing_state, CLEANING ^ NO_RESIZING,
+                            memory_order_relaxed);
+
+}
+
+static void
+resize_worker(NAME *htab)
+{
+  size_t resize_state = atomic_load_explicit(&htab->resizing_state,
+                                              memory_order_acquire);
+
+  /* If the resize has finished */
+  if (IS_NO_RESIZE_OR_CLEANING(resize_state))
+      return;
+
+ /* Register as worker and check if the resize has finished in the meantime*/
+  resize_state = atomic_fetch_add_explicit(&htab->resizing_state,
+                                            STATE_INCREMENT,
+                                            memory_order_acquire);
+  if (IS_NO_RESIZE_OR_CLEANING(resize_state))
+    {
+      atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                                memory_order_relaxed);
+      return;
+    }
+
+  /* Wait while the new table is being allocated. */
+  while (GET_STATE(resize_state) == ALLOCATING_MEMORY)
+      resize_state = atomic_load_explicit(&htab->resizing_state,
+                                          memory_order_acquire);
+
+  /* Check if the resize is done */
+  assert(GET_STATE(resize_state) != NO_RESIZING);
+  if (GET_STATE(resize_state) == CLEANING)
+    {
+      atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                                memory_order_relaxed);
+      return;
+    }
+
+  resize_helper(htab, 0);
+
+  /* Deregister worker */
+  atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                            memory_order_release);
+}
+
+
+int
+#define INIT(name) _INIT (name)
+#define _INIT(name) \
+  name##_init
+INIT(NAME) (NAME *htab, size_t init_size)
+{
+  /* We need the size to be a prime.  */
+  init_size = next_prime (init_size);
+
+  /* Initialize the data structure.  */
+  htab->size = init_size;
+  atomic_init(&htab->filled, 0);
+  atomic_init(&htab->resizing_state, 0);
+
+  atomic_init(&htab->next_init_block, 0);
+  atomic_init(&htab->num_initialized_blocks, 0);
+
+  atomic_init(&htab->next_move_block, 0);
+  atomic_init(&htab->num_moved_blocks, 0);
+
+  pthread_rwlock_init(&htab->resize_rwl, NULL);
+
+ htab->table = (void *) malloc ((init_size + 1) * sizeof (htab->table[0]));
+  if (htab->table == NULL)
+      return -1;
+
+  for (size_t i = 0; i <= init_size; i++)
+    {
+      atomic_init(&htab->table[i].hashval, (uintptr_t) NULL);
+      atomic_init(&htab->table[i].val_ptr, (uintptr_t) NULL);
+    }
+
+  return 0;
+}
+
+
+int
+#define FREE(name) _FREE (name)
+#define _FREE(name) \
+name##_free
+FREE(NAME) (NAME *htab)
+{
+  pthread_rwlock_destroy(&htab->resize_rwl);
+  free (htab->table);
+  return 0;
+}
+
+
+int
+#define INSERT(name) _INSERT (name)
+#define _INSERT(name) \
+name##_insert
+INSERT(NAME) (NAME *htab, HASHTYPE hval, TYPE data)
+{
+  int incremented = 0;
+
+  for(;;)
+    {
+      while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0)
+          resize_worker(htab);
+
+      size_t filled;
+      if (!incremented)
+        {
+          filled = atomic_fetch_add_explicit(&htab->filled, 1,
+                                              memory_order_acquire);
+          incremented = 1;
+        }
+      else
+        {
+          filled = atomic_load_explicit(&htab->filled,
+                                        memory_order_acquire);
+        }
+
+
+      if (100 * filled > 90 * htab->size)
+        {
+          /* Table is filled more than 90%.  Resize the table.  */
+
+ size_t resizing_state = atomic_load_explicit(&htab->resizing_state, + memory_order_acquire);
+          if (resizing_state == 0 &&
+ atomic_compare_exchange_strong_explicit(&htab->resizing_state,
+                                                      &resizing_state,
+ ALLOCATING_MEMORY, + memory_order_acquire, + memory_order_acquire))
+            {
+              /* Master thread */
+              pthread_rwlock_unlock(&htab->resize_rwl);
+
+              pthread_rwlock_wrlock(&htab->resize_rwl);
+              resize_master(htab);
+              pthread_rwlock_unlock(&htab->resize_rwl);
+
+            }
+          else
+            {
+              /* Worker thread */
+              pthread_rwlock_unlock(&htab->resize_rwl);
+              resize_worker(htab);
+            }
+        }
+      else
+        {
+          /* Lock acquired, no need for resize*/
+          break;
+        }
+    }
+
+  int ret_val = insert_helper(htab, hval, data);
+  if (ret_val == -1)
+ atomic_fetch_sub_explicit(&htab->filled, 1, memory_order_relaxed);
+  pthread_rwlock_unlock(&htab->resize_rwl);
+  return ret_val;
+}
+
+
+
+TYPE
+#define FIND(name) _FIND (name)
+#define _FIND(name) \
+  name##_find
+FIND(NAME) (NAME *htab, HASHTYPE hval, TYPE val)
+{
+  while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0)
+      resize_worker(htab);
+
+  size_t idx;
+
+  /* Make the hash data nonzero.  */
+  hval = hval ?: 1;
+  idx = lookup(htab, hval, val);
+
+  if (idx == 0)
+    {
+      pthread_rwlock_unlock(&htab->resize_rwl);
+      return NULL;
+    }
+
+  /* get a copy before unlocking the lock */
+  TYPE ret_val = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                             memory_order_relaxed);
+
+  pthread_rwlock_unlock(&htab->resize_rwl);
+  return ret_val;
+}
+
diff --git a/lib/dynamicsizehash_concurrent.h b/lib/dynamicsizehash_concurrent.h
new file mode 100644
index 00000000..eb06ac9e
--- /dev/null
+++ b/lib/dynamicsizehash_concurrent.h
@@ -0,0 +1,118 @@
+/* Copyright (C) 2000-2019 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Srdan Milakovic <sm...@rice.edu>, 2019.
+   Derived from Ulrich Drepper <drep...@redhat.com>, 2000.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+#include <pthread.h>
+#include "stdatomic.h"
+/* Before including this file the following macros must be defined:
+
+   NAME      name of the hash table structure.
+   TYPE      data type of the hash table entries
+
+   The following macros if present select features:
+
+   ITERATE   iterating over the table entries is possible
+   HASHTYPE  integer type for hash values, default unsigned long int
+ */
+
+
+
+#ifndef HASHTYPE
+# define HASHTYPE unsigned long int
+#endif
+
+#ifndef RESIZE_BLOCK_SIZE
+# define RESIZE_BLOCK_SIZE 256
+#endif
+
+/* Defined separately.  */
+extern size_t next_prime (size_t seed);
+
+
+/* Table entry type.  */
+#define _DYNHASHCONENTTYPE(name)       \
+  typedef struct name##_ent         \
+  {                                 \
+    _Atomic(HASHTYPE) hashval;      \
+    atomic_uintptr_t val_ptr;       \
+  } name##_ent
+#define DYNHASHENTTYPE(name) _DYNHASHCONENTTYPE (name)
+DYNHASHENTTYPE (NAME);
+
+/* Type of the dynamic hash table data structure.  */
+#define _DYNHASHCONTYPE(name) \
+typedef struct                                     \
+{                                                  \
+  size_t size;                                     \
+  size_t old_size;                                 \
+  atomic_size_t filled;                            \
+  name##_ent *table;                               \
+  name##_ent *old_table;                           \
+  atomic_size_t resizing_state;                    \
+  atomic_size_t next_init_block;                   \
+  atomic_size_t num_initialized_blocks;            \
+  atomic_size_t next_move_block;                   \
+  atomic_size_t num_moved_blocks;                  \
+  pthread_rwlock_t resize_rwl;                     \
+} name
+#define DYNHASHTYPE(name) _DYNHASHCONTYPE (name)
+DYNHASHTYPE (NAME);
+
+
+
+#define _FUNCTIONS(name)                                            \
+/* Initialize the hash table.  */                                   \
+extern int name##_init (name *htab, size_t init_size);              \
+                                                                    \
+/* Free resources allocated for hash table.  */                     \
+extern int name##_free (name *htab);                                \
+                                                                    \
+/* Insert new entry.  */                                            \
+extern int name##_insert (name *htab, HASHTYPE hval, TYPE data);    \
+                                                                    \
+/* Find entry in hash table.  */                                    \
+extern TYPE name##_find (name *htab, HASHTYPE hval, TYPE val);
+#define FUNCTIONS(name) _FUNCTIONS (name)
+FUNCTIONS (NAME)
+
+
+#ifndef NO_UNDEF
+# undef DYNHASHENTTYPE
+# undef DYNHASHTYPE
+# undef FUNCTIONS
+# undef _FUNCTIONS
+# undef XFUNCTIONS
+# undef _XFUNCTIONS
+# undef NAME
+# undef TYPE
+# undef ITERATE
+# undef COMPARE
+# undef FIRST
+# undef NEXT
+#endif
diff --git a/lib/stdatomic.h b/lib/stdatomic.h
new file mode 100644
index 00000000..49626662
--- /dev/null
+++ b/lib/stdatomic.h
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2011 Ed Schouten <e...@freebsd.org>
+ *                    David Chisnall <thera...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _STDATOMIC_H_
+#define        _STDATOMIC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if !defined(__has_builtin)
+#define __has_builtin(x) 0
+#endif
+#if !defined(__GNUC_PREREQ__)
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define __GNUC_PREREQ__(maj, min)                                      \
+       ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GNUC_PREREQ__(maj, min) 0
+#endif
+#endif
+
+#if !defined(__CLANG_ATOMICS) && !defined(__GNUC_ATOMICS)
+#if __has_feature(c_atomic)
+#define        __CLANG_ATOMICS
+#elif __GNUC_PREREQ__(4, 7)
+#define        __GNUC_ATOMICS
+#elif !defined(__GNUC__)
+#error "stdatomic.h does not support your compiler"
+#endif
+#endif
+
+/*
+ * language independent type to represent a Boolean value
+ */
+
+typedef int __Bool;
+
+/*
+ * 7.17.1 Atomic lock-free macros.
+ */
+
+#ifdef __GCC_ATOMIC_BOOL_LOCK_FREE
+#define        ATOMIC_BOOL_LOCK_FREE           __GCC_ATOMIC_BOOL_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR_LOCK_FREE
+#define        ATOMIC_CHAR_LOCK_FREE           __GCC_ATOMIC_CHAR_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define        ATOMIC_CHAR16_T_LOCK_FREE       __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define        ATOMIC_CHAR32_T_LOCK_FREE       __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define        ATOMIC_WCHAR_T_LOCK_FREE        __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_SHORT_LOCK_FREE
+#define        ATOMIC_SHORT_LOCK_FREE          __GCC_ATOMIC_SHORT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_INT_LOCK_FREE
+#define        ATOMIC_INT_LOCK_FREE            __GCC_ATOMIC_INT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LONG_LOCK_FREE
+#define        ATOMIC_LONG_LOCK_FREE           __GCC_ATOMIC_LONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LLONG_LOCK_FREE
+#define        ATOMIC_LLONG_LOCK_FREE          __GCC_ATOMIC_LLONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_POINTER_LOCK_FREE
+#define        ATOMIC_POINTER_LOCK_FREE        __GCC_ATOMIC_POINTER_LOCK_FREE
+#endif
+
+#if !defined(__CLANG_ATOMICS)
+#define        _Atomic(T)                      struct { volatile __typeof__(T) 
__val; }
+#endif
+
+/*
+ * 7.17.2 Initialization.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define        ATOMIC_VAR_INIT(value)          (value)
+#define        atomic_init(obj, value)         __c11_atomic_init(obj, value)
+#else
+#define        ATOMIC_VAR_INIT(value)          { .__val = (value) }
+#define        atomic_init(obj, value)         ((void)((obj)->__val = (value)))
+#endif
+
+/*
+ * Clang and recent GCC both provide predefined macros for the memory
+ * orderings. If we are using a compiler that doesn't define them, use the
+ * clang values - these will be ignored in the fallback path.
+ */
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED               0
+#endif
+#ifndef __ATOMIC_CONSUME
+#define __ATOMIC_CONSUME               1
+#endif
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE               2
+#endif
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE               3
+#endif
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL               4
+#endif
+#ifndef __ATOMIC_SEQ_CST
+#define __ATOMIC_SEQ_CST               5
+#endif
+
+/*
+ * 7.17.3 Order and consistency.
+ *
+ * The memory_order_* constants that denote the barrier behaviour of the
+ * atomic operations.
+ */
+
+typedef enum {
+    memory_order_relaxed = __ATOMIC_RELAXED,
+    memory_order_consume = __ATOMIC_CONSUME,
+    memory_order_acquire = __ATOMIC_ACQUIRE,
+    memory_order_release = __ATOMIC_RELEASE,
+    memory_order_acq_rel = __ATOMIC_ACQ_REL,
+    memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+/*
+ * 7.17.4 Fences.
+ */
+
+//#define __unused
+
+//static __inline void
+//atomic_thread_fence(memory_order __order __unused)
+//{
+//
+//#ifdef __CLANG_ATOMICS
+//    __c11_atomic_thread_fence(__order);
+//#elif defined(__GNUC_ATOMICS)
+//    __atomic_thread_fence(__order);
+//#else
+//    __sync_synchronize();
+//#endif
+//}
+//
+//static __inline void
+//atomic_signal_fence(memory_order __order __unused)
+//{
+//
+//#ifdef __CLANG_ATOMICS
+//    __c11_atomic_signal_fence(__order);
+//#elif defined(__GNUC_ATOMICS)
+//    __atomic_signal_fence(__order);
+//#else
+//    __asm volatile ("" ::: "memory");
+//#endif
+//}
+
+//#undef __unused
+
+/*
+ * 7.17.5 Lock-free property.
+ */
+
+#if defined(_KERNEL)
+/* Atomics in kernelspace are always lock-free. */
+#define        atomic_is_lock_free(obj) \
+       ((void)(obj), (__Bool)1)
+#elif defined(__CLANG_ATOMICS)
+#define        atomic_is_lock_free(obj) \
+       __atomic_is_lock_free(sizeof(*(obj)), obj)
+#elif defined(__GNUC_ATOMICS)
+#define        atomic_is_lock_free(obj) \
+       __atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val)
+#else
+#define        atomic_is_lock_free(obj) \
+       ((void)(obj), sizeof((obj)->__val) <= sizeof(void *))
+#endif
+
+/*
+ * 7.17.6 Atomic integer types.
+ */
+
+typedef _Atomic(__Bool)                        atomic_bool;
+typedef _Atomic(char)                  atomic_char;
+typedef _Atomic(signed char)           atomic_schar;
+typedef _Atomic(unsigned char)         atomic_uchar;
+typedef _Atomic(short)                 atomic_short;
+typedef _Atomic(unsigned short)                atomic_ushort;
+typedef _Atomic(int)                   atomic_int;
+typedef _Atomic(unsigned int)          atomic_uint;
+typedef _Atomic(long)                  atomic_long;
+typedef _Atomic(unsigned long)         atomic_ulong;
+typedef _Atomic(long long)             atomic_llong;
+typedef _Atomic(unsigned long long)    atomic_ullong;
+#if 0
+typedef _Atomic(char16_t)              atomic_char16_t;
+typedef _Atomic(char32_t)              atomic_char32_t;
+#endif
+typedef _Atomic(wchar_t)               atomic_wchar_t;
+typedef _Atomic(int_least8_t)          atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)         atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)         atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)                atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)         atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)                atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)         atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)                atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)           atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)          atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)          atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)         atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)          atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)         atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)          atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)         atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)              atomic_intptr_t;
+typedef _Atomic(uintptr_t)             atomic_uintptr_t;
+typedef _Atomic(size_t)                        atomic_size_t;
+typedef _Atomic(ptrdiff_t)             atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)              atomic_intmax_t;
+typedef _Atomic(uintmax_t)             atomic_uintmax_t;
+
+/*
+ * 7.17.7 Operations on atomic types.
+ */
+
+/*
+ * Compiler-specific operations.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define        atomic_compare_exchange_strong_explicit(object, expected,       
\
+    desired, success, failure)                                         \
+       __c11_atomic_compare_exchange_strong(object, expected, desired, \
+           success, failure)
+#define        atomic_compare_exchange_weak_explicit(object, expected,         
\
+    desired, success, failure)                                         \
+       __c11_atomic_compare_exchange_weak(object, expected, desired,   \
+           success, failure)
+#define        atomic_exchange_explicit(object, desired, order)                
\
+       __c11_atomic_exchange(object, desired, order)
+#define        atomic_fetch_add_explicit(object, operand, order)               
\
+       __c11_atomic_fetch_add(object, operand, order)
+#define        atomic_fetch_and_explicit(object, operand, order)               
\
+       __c11_atomic_fetch_and(object, operand, order)
+#define        atomic_fetch_or_explicit(object, operand, order)                
\
+       __c11_atomic_fetch_or(object, operand, order)
+#define        atomic_fetch_sub_explicit(object, operand, order)               
\
+       __c11_atomic_fetch_sub(object, operand, order)
+#define        atomic_fetch_xor_explicit(object, operand, order)               
\
+       __c11_atomic_fetch_xor(object, operand, order)
+#define        atomic_load_explicit(object, order)                             
\
+       __c11_atomic_load(object, order)
+#define        atomic_store_explicit(object, desired, order)                   
\
+       __c11_atomic_store(object, desired, order)
+#elif defined(__GNUC_ATOMICS)
+#define        atomic_compare_exchange_strong_explicit(object, expected,       
\
+    desired, success, failure)                                         \
+       __atomic_compare_exchange_n(&(object)->__val, expected,          \
+           desired, 0, success, failure)
+#define        atomic_compare_exchange_weak_explicit(object, expected,         
\
+    desired, success, failure)                                         \
+       __atomic_compare_exchange_n(&(object)->__val, expected,          \
+           desired, 1, success, failure)
+#define        atomic_exchange_explicit(object, desired, order)                
\
+       __atomic_exchange_n(&(object)->__val, desired, order)
+#define        atomic_fetch_add_explicit(object, operand, order)               
\
+       __atomic_fetch_add(&(object)->__val, operand, order)
+#define        atomic_fetch_and_explicit(object, operand, order)               
\
+       __atomic_fetch_and(&(object)->__val, operand, order)
+#define        atomic_fetch_or_explicit(object, operand, order)                
\
+       __atomic_fetch_or(&(object)->__val, operand, order)
+#define        atomic_fetch_sub_explicit(object, operand, order)               
\
+       __atomic_fetch_sub(&(object)->__val, operand, order)
+#define        atomic_fetch_xor_explicit(object, operand, order)               
\
+       __atomic_fetch_xor(&(object)->__val, operand, order)
+#define        atomic_load_explicit(object, order)                             
\
+       __atomic_load_n(&(object)->__val, order)
+#define        atomic_store_explicit(object, desired, order)                   
\
+       __atomic_store_n(&(object)->__val, desired, order)
+#else
+#define        __atomic_apply_stride(object, operand) \
+       (((__typeof__((object)->__val))0) + (operand))
+#define        atomic_compare_exchange_strong_explicit(object, expected,       
\
+    desired, success, failure) __extension__ ({                        \
+       __typeof__(expected) __ep = (expected);                         \
+       __typeof__(*__ep) __e = *__ep;                                  \
+       (void)(success); (void)(failure);                               \
+       (__Bool)((*__ep = __sync_val_compare_and_swap(&(object)->__val,  \
+           __e, desired)) == __e);                                     \
+})
+#define        atomic_compare_exchange_weak_explicit(object, expected,         
\
+    desired, success, failure)                                         \
+       atomic_compare_exchange_strong_explicit(object, expected,       \
+               desired, success, failure)
+#if __has_builtin(__sync_swap)
+/* Clang provides a full-barrier atomic exchange - use it if available. */
+#define        atomic_exchange_explicit(object, desired, order)                
\
+       ((void)(order), __sync_swap(&(object)->__val, desired))
+#else
+/*
+ * __sync_lock_test_and_set() is only an acquire barrier in theory (although in + * practice it is usually a full barrier) so we need an explicit barrier before
+ * it.
+ */
+#define        atomic_exchange_explicit(object, desired, order)                
\
+__extension__ ({                                                       \
+       __typeof__(object) __o = (object);                              \
+       __typeof__(desired) __d = (desired);                            \
+       (void)(order);                                                  \
+       __sync_synchronize();                                           \
+       __sync_lock_test_and_set(&(__o)->__val, __d);                    \
+})
+#endif
+#define        atomic_fetch_add_explicit(object, operand, order)               
\
+       ((void)(order), __sync_fetch_and_add(&(object)->__val,           \
+           __atomic_apply_stride(object, operand)))
+#define        atomic_fetch_and_explicit(object, operand, order)               
\
+       ((void)(order), __sync_fetch_and_and(&(object)->__val, operand))
+#define        atomic_fetch_or_explicit(object, operand, order)                
\
+       ((void)(order), __sync_fetch_and_or(&(object)->__val, operand))
+#define        atomic_fetch_sub_explicit(object, operand, order)               
\
+       ((void)(order), __sync_fetch_and_sub(&(object)->__val,           \
+           __atomic_apply_stride(object, operand)))
+#define        atomic_fetch_xor_explicit(object, operand, order)               
\
+       ((void)(order), __sync_fetch_and_xor(&(object)->__val, operand))
+#define        atomic_load_explicit(object, order)                             
\
+       ((void)(order), __sync_fetch_and_add(&(object)->__val, 0))
+#define        atomic_store_explicit(object, desired, order)                   
\
+       ((void)atomic_exchange_explicit(object, desired, order))
+#endif
+
+/*
+ * Convenience functions.
+ *
+ * Don't provide these in kernel space. In kernel space, we should be
+ * disciplined enough to always provide explicit barriers.
+ */
+
+#ifndef _KERNEL
+#define        atomic_compare_exchange_strong(object, expected, desired)       
\
+       atomic_compare_exchange_strong_explicit(object, expected,       \
+           desired, memory_order_seq_cst, memory_order_seq_cst)
+#define        atomic_compare_exchange_weak(object, expected, desired)         
\
+       atomic_compare_exchange_weak_explicit(object, expected,         \
+           desired, memory_order_seq_cst, memory_order_seq_cst)
+#define        atomic_exchange(object, desired)                                
\
+       atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#define        atomic_fetch_add(object, operand)                               
\
+       atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#define        atomic_fetch_and(object, operand)                               
\
+       atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#define        atomic_fetch_or(object, operand)                                
\
+       atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#define        atomic_fetch_sub(object, operand)                               
\
+       atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#define        atomic_fetch_xor(object, operand)                               
\
+       atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#define        atomic_load(object)                                             
\
+       atomic_load_explicit(object, memory_order_seq_cst)
+#define        atomic_store(object, desired)                                   
\
+       atomic_store_explicit(object, desired, memory_order_seq_cst)
+#endif /* !_KERNEL */
+
+/*
+ * 7.17.8 Atomic flag type and operations.
+ *
+ * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some
+ * kind of compiler built-in type we could use?
+ */
+
+typedef struct {
+    atomic_bool        __flag;
+} atomic_flag;
+
+#define        ATOMIC_FLAG_INIT                { ATOMIC_VAR_INIT(0) }
+
+static __inline __Bool
+atomic_flag_test_and_set_explicit(volatile atomic_flag *__object,
+                                  memory_order __order)
+{
+    return (atomic_exchange_explicit(&__object->__flag, 1, __order));
+}
+
+static __inline void
+atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order)
+{
+
+    atomic_store_explicit(&__object->__flag, 0, __order);
+}
+
+#ifndef _KERNEL
+static __inline __Bool
+atomic_flag_test_and_set(volatile atomic_flag *__object)
+{
+
+    return (atomic_flag_test_and_set_explicit(__object,
+                                              memory_order_seq_cst));
+}
+
+static __inline void
+atomic_flag_clear(volatile atomic_flag *__object)
+{
+
+    atomic_flag_clear_explicit(__object, memory_order_seq_cst);
+}
+#endif /* !_KERNEL */
+
+#endif /* !_STDATOMIC_H_ */
\ No newline at end of file
diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index bf1f4857..87abf7a7 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,12 @@
+2019-08-15  Jonathon Anderson  <jm...@rice.edu>
+
+       * libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator.
+       * libdwP.h (Dwarf): Likewise.
+       * dwarf_begin_elf.c (dwarf_begin_elf): Support for above.
+       * dwarf_end.c (dwarf_end): Likewise.
+       * dwarf_abbrev_hash.{c,h}: Use the *_concurrent hash table.
+       * Makefile.am: Link -lpthread to provide rwlocks.
+
2019-08-12  Mark Wielaard  <m...@klomp.org>

        * libdw.map (ELFUTILS_0.177): Add new version of dwelf_elf_begin.
diff --git a/libdw/Makefile.am b/libdw/Makefile.am
index 7a3d5322..6d0a0187 100644
--- a/libdw/Makefile.am
+++ b/libdw/Makefile.am
@@ -108,7 +108,7 @@ am_libdw_pic_a_OBJECTS = $(libdw_a_SOURCES:.c=.os)
libdw_so_LIBS = libdw_pic.a ../libdwelf/libdwelf_pic.a \
          ../libdwfl/libdwfl_pic.a ../libebl/libebl.a
libdw_so_DEPS = ../lib/libeu.a ../libelf/libelf.so
-libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS)
+libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) -lpthread
libdw_so_SOURCES =
libdw.so$(EXEEXT): $(srcdir)/libdw.map $(libdw_so_LIBS) $(libdw_so_DEPS)
# The rpath is necessary for libebl because its $ORIGIN use will
diff --git a/libdw/dwarf_abbrev_hash.c b/libdw/dwarf_abbrev_hash.c
index f52f5ad5..c2548140 100644
--- a/libdw/dwarf_abbrev_hash.c
+++ b/libdw/dwarf_abbrev_hash.c
@@ -38,7 +38,7 @@
#define next_prime __libdwarf_next_prime
extern size_t next_prime (size_t) attribute_hidden;

-#include <dynamicsizehash.c>
+#include <dynamicsizehash_concurrent.c>

#undef next_prime
#define next_prime attribute_hidden __libdwarf_next_prime
diff --git a/libdw/dwarf_abbrev_hash.h b/libdw/dwarf_abbrev_hash.h
index d2f02ccc..bc3d62c7 100644
--- a/libdw/dwarf_abbrev_hash.h
+++ b/libdw/dwarf_abbrev_hash.h
@@ -34,6 +34,6 @@
#define TYPE Dwarf_Abbrev *
#define COMPARE(a, b) (0)

-#include <dynamicsizehash.h>
+#include <dynamicsizehash_concurrent.h>

#endif  /* dwarf_abbrev_hash.h */
diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
index 38c8f5c6..b3885bb5 100644
--- a/libdw/dwarf_begin_elf.c
+++ b/libdw/dwarf_begin_elf.c
@@ -417,11 +417,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp)
  /* Initialize the memory handling.  */
  result->mem_default_size = mem_default_size;
  result->oom_handler = __libdw_oom;
-  result->mem_tail = (struct libdw_memblock *) (result + 1);
-  result->mem_tail->size = (result->mem_default_size
-                           - offsetof (struct libdw_memblock, mem));
-  result->mem_tail->remaining = result->mem_tail->size;
-  result->mem_tail->prev = NULL;
+  pthread_rwlock_init(&result->mem_rwl, NULL);
+  result->mem_stacks = 1;
+  result->mem_tails = malloc (sizeof (struct libdw_memblock *));
+  result->mem_tails[0] = (struct libdw_memblock *) (result + 1);
+  result->mem_tails[0]->size = (result->mem_default_size
+                              - offsetof (struct libdw_memblock, mem));
+  result->mem_tails[0]->remaining = result->mem_tails[0]->size;
+  result->mem_tails[0]->prev = NULL;

  if (cmd == DWARF_C_READ || cmd == DWARF_C_RDWR)
    {
diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
index 29795c10..6317bcda 100644
--- a/libdw/dwarf_end.c
+++ b/libdw/dwarf_end.c
@@ -94,14 +94,22 @@ dwarf_end (Dwarf *dwarf)
      /* And the split Dwarf.  */
      tdestroy (dwarf->split_tree, noop_free);

-      struct libdw_memblock *memp = dwarf->mem_tail;
- /* The first block is allocated together with the Dwarf object. */
-      while (memp->prev != NULL)
-       {
-         struct libdw_memblock *prevp = memp->prev;
-         free (memp);
-         memp = prevp;
-       }
+      for (size_t i = 0; i < dwarf->mem_stacks; i++)
+        {
+          struct libdw_memblock *memp = dwarf->mem_tails[i];
+ /* The first block is allocated together with the Dwarf object. */
+          while (memp != NULL && memp->prev != NULL)
+           {
+             struct libdw_memblock *prevp = memp->prev;
+             free (memp);
+             memp = prevp;
+           }
+ /* Only for stack 0 though, the others are allocated individually. */
+          if (memp != NULL && i > 0)
+            free (memp);
+        }
+      free (dwarf->mem_tails);
+      pthread_rwlock_destroy (&dwarf->mem_rwl);

      /* Free the pubnames helper structure.  */
      free (dwarf->pubnames_sets);
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index eebb7d12..442d493d 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -31,6 +31,7 @@

#include <libintl.h>
#include <stdbool.h>
+#include <pthread.h>

#include <libdw.h>
#include <dwarf.h>
@@ -218,16 +219,18 @@ struct Dwarf
/* Similar for addrx/constx, which will come from .debug_addr section. */
  struct Dwarf_CU *fake_addr_cu;

-  /* Internal memory handling.  This is basically a simplified
+ /* Internal memory handling. This is basically a simplified thread-local
     reimplementation of obstacks.  Unfortunately the standard obstack
     implementation is not usable in libraries.  */
+  pthread_rwlock_t mem_rwl;
+  size_t mem_stacks;
  struct libdw_memblock
  {
    size_t size;
    size_t remaining;
    struct libdw_memblock *prev;
    char mem[0];
-  } *mem_tail;
+  } **mem_tails;

  /* Default size of allocated memory blocks.  */
  size_t mem_default_size;
@@ -572,7 +575,7 @@ extern void __libdw_seterrno (int value) internal_function;

/* Memory handling, the easy parts. This macro does not do any locking. */
#define libdw_alloc(dbg, type, tsize, cnt) \
-  ({ struct libdw_memblock *_tail = (dbg)->mem_tail;                           
   \
+  ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg);                 \
     size_t _required = (tsize) * (cnt);                                      \
type *_result = (type *) (_tail->mem + (_tail->size - _tail->remaining));\
     size_t _padding = ((__alignof (type)                                     \
@@ -591,6 +594,10 @@ extern void __libdw_seterrno (int value) internal_function;
#define libdw_typed_alloc(dbg, type) \
  libdw_alloc (dbg, type, sizeof (type), 1)

+/* Callback to choose a thread-local memory allocation stack.  */
+extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg)
+     __nonnull_attribute__ (1);
+
/* Callback to allocate more.  */
extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
     __attribute__ ((__malloc__)) __nonnull_attribute__ (1);
diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c
index f1e08714..c3c5e8a7 100644
--- a/libdw/libdw_alloc.c
+++ b/libdw/libdw_alloc.c
@@ -33,9 +33,73 @@

#include <errno.h>
#include <stdlib.h>
+#include <assert.h>
#include "libdwP.h"
#include "system.h"
+#include "stdatomic.h"
+#if USE_VG_ANNOTATIONS == 1
+#include <helgrind.h>
+#include <drd.h>
+#else
+#define ANNOTATE_HAPPENS_BEFORE(X)
+#define ANNOTATE_HAPPENS_AFTER(X)
+#endif
+
+
+#define thread_local __thread

+static thread_local int initialized = 0;
+static thread_local size_t thread_id;
+static atomic_size_t next_id = ATOMIC_VAR_INIT(0);
+
+struct libdw_memblock *
+__libdw_alloc_tail (Dwarf *dbg)
+{
+  if (!initialized)
+    {
+      thread_id = atomic_fetch_add (&next_id, 1);
+      initialized = 1;
+    }
+
+  pthread_rwlock_rdlock (&dbg->mem_rwl);
+  if (thread_id >= dbg->mem_stacks)
+    {
+      pthread_rwlock_unlock (&dbg->mem_rwl);
+      pthread_rwlock_wrlock (&dbg->mem_rwl);
+
+ /* Another thread may have already reallocated. In theory using an + atomic would be faster, but given that this only happens once per
+         thread per Dwarf, some minor slowdown should be fine.  */
+      if (thread_id >= dbg->mem_stacks)
+        {
+          dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1)
+ * sizeof (struct libdw_memblock *));
+          assert(dbg->mem_tails);
+          for (size_t i = dbg->mem_stacks; i <= thread_id; i++)
+            dbg->mem_tails[i] = NULL;
+          dbg->mem_stacks = thread_id + 1;
+          ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails);
+        }
+
+      pthread_rwlock_unlock (&dbg->mem_rwl);
+      pthread_rwlock_rdlock (&dbg->mem_rwl);
+    }
+
+  /* At this point, we have an entry in the tail array.  */
+  ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails);
+  struct libdw_memblock *result = dbg->mem_tails[thread_id];
+  if (result == NULL)
+    {
+      result = malloc (dbg->mem_default_size);
+      result->size = dbg->mem_default_size
+                     - offsetof (struct libdw_memblock, mem);
+      result->remaining = result->size;
+      result->prev = NULL;
+      dbg->mem_tails[thread_id] = result;
+    }
+  pthread_rwlock_unlock (&dbg->mem_rwl);
+  return result;
+}

void *
__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
@@ -52,8 +116,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
  newp->size = size - offsetof (struct libdw_memblock, mem);
  newp->remaining = (uintptr_t) newp + size - (result + minsize);

-  newp->prev = dbg->mem_tail;
-  dbg->mem_tail = newp;
+  pthread_rwlock_rdlock (&dbg->mem_rwl);
+  newp->prev = dbg->mem_tails[thread_id];
+  dbg->mem_tails[thread_id] = newp;
+  pthread_rwlock_unlock (&dbg->mem_rwl);

  return (void *) result;
}
--
2.23.0.rc1



Reply via email to