Hi Aaron,

On Tue, 2025-04-15 at 17:04 -0400, Aaron Merey wrote:
> eu_tfind is used to facilitate lazy loading throughout libdw.
> If a result is not found via eu_tfind, work is done to load
> the result and cache it in an eu_search_tree.
> 
> Some calls to eu_tfind allow for TOCTOU bugs.  Multiple threads
> might race to call eu_tfind on some result that hasn't yet been
> cached.  Multiple threads may then attempt to load the result
> which may cause an unnecessary amount of memory may be allocated.
> Additionally this memory may not get released when the associated
> libdw data structure is freed.
> 
> Fix this by adding additional locking to ensure that only one
> thread performs lazy loading.
> 
> One approach used in this patch is to preserve calls to eu_tfind
> without additional locking, but when the result isn't found then
> a lock is then used to synchronize access to the lazy loading code.
> An extra eu_tfind call has been added at the start of these critical
> section to synchronize verification that lazy loading should proceed.
> 
> Another approach used is to simply synchronize entire calls to
> functions where lazy loading via eu_tfind might occur (__libdw_find_fde
> and __libdw_intern_expression).
> 
> libdw/
>       * cfi.h (struct Dwarf_CFI_s): Declare new mutex.
>       * dwarf_begin_elf.c (valid_p): Initialize all locks for fake CUs.
>       * dwarf_cfi_addrframe.c (dwarf_cfi_addrframe): Place lock around
>       __libdw_find_fde.
>       * dwarf_end.c (cu_free): Deallocate all locks unconditionally,
>       whether or not the CU is fake.
>       * dwarf_frame_cfa.c (dwarf_frame_cfa): Place lock around
>       __libdw_intern_expression.
>       * dwarf_frame_register.c (dwarf_frame_register): Ditto.
>       * dwarf_getcfi.c (dwarf_getcfi): Initialize cfi lock.
>       * dwarf_getlocation.c (is_constant_offset): Synchronize access
>       to lazy loading section.
>       (getlocation): Place lock around __libdw_intern_expression.
>       * dwarf_getmacros.c (cache_op_table): Synchronize access to lazy
>       loading section.
>       * frame-cache.c (__libdw_destroy_frame_cache): Free Dwarf_CFI
>       mutex.
>       * libdwP.h (struct Dwarf): Update macro_lock comment.
>       (struct Dwarf_CU): Declare new mutex.
>       libdw_findcu.c (__libdw_intern_next_unit): Initialize
>       intern_lock.
>       (__libdw_findcu): Adjust locking so that the first eu_tfind
>       can be done without extra lock overhead.
> 
> Signed-off-by: Aaron Merey <ame...@redhat.com>
> ---
>  libdw/cfi.h                  |  4 +++
>  libdw/dwarf_begin_elf.c      | 15 +++++++++
>  libdw/dwarf_cfi_addrframe.c  |  3 ++
>  libdw/dwarf_end.c            | 10 +++---
>  libdw/dwarf_frame_cfa.c      |  2 ++
>  libdw/dwarf_frame_register.c | 16 +++++----
>  libdw/dwarf_getcfi.c         |  1 +
>  libdw/dwarf_getlocation.c    | 63 ++++++++++++++++++++++--------------
>  libdw/dwarf_getmacros.c      | 16 ++++++++-
>  libdw/frame-cache.c          |  1 +
>  libdw/libdwP.h               |  6 +++-
>  libdw/libdw_findcu.c         |  9 ++++--
>  12 files changed, 108 insertions(+), 38 deletions(-)
> 
> diff --git a/libdw/cfi.h b/libdw/cfi.h
> index d0134243..48e42a41 100644
> --- a/libdw/cfi.h
> +++ b/libdw/cfi.h
> @@ -98,6 +98,10 @@ struct Dwarf_CFI_s
>    /* Search tree for parsed DWARF expressions, indexed by raw pointer.  */
>    search_tree expr_tree;
>  
> +  /* Should be held when calling __libdw_find_fde and when 
> __libdw_intern_expression
> +     is called with Dwarf_CFI members.  */
> +  mutex_define(, lock);
> +
>    /* Backend hook.  */
>    struct ebl *ebl;
> 

OK, used in dwarf_cfi_addrframe, dwarf_frame_cfa, dwarf_frame_register.
I needed to double check the use of __libdw_intern_expression in
libdw/dwarf_getlocation.c (getlocation) though. That one is also
properly "locked", but using the cu intern_lock instead. Which makes
sense. But maybe we can document that somewhere?

> diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
> index 58a309e9..63e2805d 100644
> --- a/libdw/dwarf_begin_elf.c
> +++ b/libdw/dwarf_begin_elf.c
> @@ -359,6 +359,11 @@ valid_p (Dwarf *result)
>         result->fake_loc_cu->version = 4;
>         result->fake_loc_cu->split = NULL;
>         eu_search_tree_init (&result->fake_loc_cu->locs_tree);
> +       rwlock_init (result->fake_loc_cu->abbrev_lock);
> +       rwlock_init (result->fake_loc_cu->split_lock);
> +       mutex_init (result->fake_loc_cu->src_lock);
> +       mutex_init (result->fake_loc_cu->str_off_base_lock);
> +       mutex_init (result->fake_loc_cu->intern_lock);
>       }
>      }
>  
> @@ -387,6 +392,11 @@ valid_p (Dwarf *result)
>         result->fake_loclists_cu->version = 5;
>         result->fake_loclists_cu->split = NULL;
>         eu_search_tree_init (&result->fake_loclists_cu->locs_tree);
> +       rwlock_init (result->fake_loclists_cu->abbrev_lock);
> +       rwlock_init (result->fake_loclists_cu->split_lock);
> +       mutex_init (result->fake_loclists_cu->src_lock);
> +       mutex_init (result->fake_loclists_cu->str_off_base_lock);
> +       mutex_init (result->fake_loclists_cu->intern_lock);
>       }
>      }
>  
> @@ -420,6 +430,11 @@ valid_p (Dwarf *result)
>         result->fake_addr_cu->version = 5;
>         result->fake_addr_cu->split = NULL;
>         eu_search_tree_init (&result->fake_addr_cu->locs_tree);
> +       rwlock_init (result->fake_addr_cu->abbrev_lock);
> +       rwlock_init (result->fake_addr_cu->split_lock);
> +       mutex_init (result->fake_addr_cu->src_lock);
> +       mutex_init (result->fake_addr_cu->str_off_base_lock);
> +       mutex_init (result->fake_addr_cu->intern_lock);
>       }
>      }

O, that is good. Missed completely that the "fake CUs" never had their
locks initialized. That must have caused some unexpected issues.
 
> diff --git a/libdw/dwarf_cfi_addrframe.c b/libdw/dwarf_cfi_addrframe.c
> index 44240279..f391f9f9 100644
> --- a/libdw/dwarf_cfi_addrframe.c
> +++ b/libdw/dwarf_cfi_addrframe.c
> @@ -39,7 +39,10 @@ dwarf_cfi_addrframe (Dwarf_CFI *cache, Dwarf_Addr address, 
> Dwarf_Frame **frame)
>    if (cache == NULL)
>      return -1;
>  
> +  mutex_lock (cache->lock);
>    struct dwarf_fde *fde = __libdw_find_fde (cache, address);
> +  mutex_unlock (cache->lock);
> +
>    if (fde == NULL)
>      return -1;

Ack. So to my surprise this seems to be the only use of libdw_find_fd.
Does this mean we can use the non-locked tfind and tsearch calls now
inside libdw_find_fd (instead of the eu_tfind and eu_tsearch variants)?

> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index 1628e448..979b1168 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -61,17 +61,19 @@ static void
>  cu_free (void *arg)
>  {
>    struct Dwarf_CU *p = (struct Dwarf_CU *) arg;
> +
>    eu_search_tree_fini (&p->locs_tree, noop_free);
> +  rwlock_fini (p->abbrev_lock);
> +  rwlock_fini (p->split_lock);
> +  mutex_fini (p->src_lock);
> +  mutex_fini (p->str_off_base_lock);
> +  mutex_fini (p->intern_lock);
>  
>    /* Only free the CU internals if its not a fake CU.  */
>    if (p != p->dbg->fake_loc_cu && p != p->dbg->fake_loclists_cu
>       && p != p->dbg->fake_addr_cu)
>      {
>        Dwarf_Abbrev_Hash_free (&p->abbrev_hash);
> -      rwlock_fini (p->abbrev_lock);
> -      rwlock_fini (p->split_lock);
> -      mutex_fini (p->src_lock);
> -      mutex_fini (p->str_off_base_lock);
>  
>        /* Free split dwarf one way (from skeleton to split).  */
>        if (p->unit_type == DW_UT_skeleton

OK, good to see this fake cu locking fixed.

> diff --git a/libdw/dwarf_frame_cfa.c b/libdw/dwarf_frame_cfa.c
> index 07f998cd..c18ee499 100644
> --- a/libdw/dwarf_frame_cfa.c
> +++ b/libdw/dwarf_frame_cfa.c
> @@ -57,11 +57,13 @@ dwarf_frame_cfa (Dwarf_Frame *fs, Dwarf_Op **ops, size_t 
> *nops)
>  
>      case cfa_expr:
>        /* Parse the expression into internal form.  */
> +      mutex_lock (fs->cache->lock);
>        result = __libdw_intern_expression
>       (NULL, fs->cache->other_byte_order,
>        fs->cache->e_ident[EI_CLASS] == ELFCLASS32 ? 4 : 8, 4,
>        &fs->cache->expr_tree, &fs->cfa_data.expr, false, false,
>        ops, nops, IDX_debug_frame);
> +      mutex_unlock (fs->cache->lock);
>        break;
>  
>      case cfa_invalid:
> diff --git a/libdw/dwarf_frame_register.c b/libdw/dwarf_frame_register.c
> index a6b7c4c1..dbd7f1b2 100644
> --- a/libdw/dwarf_frame_register.c
> +++ b/libdw/dwarf_frame_register.c
> @@ -109,12 +109,16 @@ dwarf_frame_register (Dwarf_Frame *fs, int regno, 
> Dwarf_Op ops_mem[3],
>       block.data = (void *) p;
>  
>       /* Parse the expression into internal form.  */
> -     if (__libdw_intern_expression (NULL,
> -                                    fs->cache->other_byte_order,
> -                                    address_size, 4,
> -                                    &fs->cache->expr_tree, &block,
> -                                    true, reg->rule == reg_val_expression,
> -                                    ops, nops, IDX_debug_frame) < 0)
> +     mutex_lock (fs->cache->lock);
> +     int res = __libdw_intern_expression (NULL,
> +                                          fs->cache->other_byte_order,
> +                                          address_size, 4,
> +                                          &fs->cache->expr_tree, &block,
> +                                          true, reg->rule == 
> reg_val_expression,
> +                                          ops, nops, IDX_debug_frame);
> +     mutex_unlock (fs->cache->lock);
> +
> +     if (res < 0)
>         return -1;
>       break;
>        }

Similar question for __libdw_intern_expression, since there can now not
be any concurrent calls to __libdw_intern_expression for the same cache
(or cu), can we now switch to the non-locked tfind and tsearch variants
inside __libdw_intern_expression?

> diff --git a/libdw/dwarf_getcfi.c b/libdw/dwarf_getcfi.c
> index a4497152..893e3c74 100644
> --- a/libdw/dwarf_getcfi.c
> +++ b/libdw/dwarf_getcfi.c
> @@ -70,6 +70,7 @@ dwarf_getcfi (Dwarf *dbg)
>        eu_search_tree_init (&cfi->cie_tree);
>        eu_search_tree_init (&cfi->fde_tree);
>        eu_search_tree_init (&cfi->expr_tree);
> +      mutex_init (cfi->lock);
>  
>        cfi->ebl = NULL;

Ack.

> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index ad1d46ca..7bf96716 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -216,27 +216,38 @@ is_constant_offset (Dwarf_Attribute *attr,
>  
>    if (found == NULL)
>      {
> -      Dwarf_Word offset;
> -      if (INTUSE(dwarf_formudata) (attr, &offset) != 0)
> -     return -1;
> +      mutex_lock (attr->cu->intern_lock);
> +
> +      found = eu_tfind (&fake, &attr->cu->locs_tree, loc_compare);
> +      if (found == NULL)
> +     {
> +       Dwarf_Word offset;
> +       if (INTUSE(dwarf_formudata) (attr, &offset) != 0)
> +         {
> +           mutex_unlock (attr->cu->intern_lock);
> +           return -1;
> +         }
>  
> -      Dwarf_Op *result = libdw_alloc (attr->cu->dbg,
> -                                   Dwarf_Op, sizeof (Dwarf_Op), 1);
> +       Dwarf_Op *result = libdw_alloc (attr->cu->dbg,
> +                                       Dwarf_Op, sizeof (Dwarf_Op), 1);
>  
> -      result->atom = DW_OP_plus_uconst;
> -      result->number = offset;
> -      result->number2 = 0;
> -      result->offset = 0;
> +       result->atom = DW_OP_plus_uconst;
> +       result->number = offset;
> +       result->number2 = 0;
> +       result->offset = 0;
>  
> -      /* Insert a record in the search tree so we can find it again later.  
> */
> -      struct loc_s *newp = libdw_alloc (attr->cu->dbg,
> -                                     struct loc_s, sizeof (struct loc_s),
> -                                     1);
> -      newp->addr = attr->valp;
> -      newp->loc = result;
> -      newp->nloc = 1;
> +       /* Insert a record in the search tree so we can find it again later.  
> */
> +       struct loc_s *newp = libdw_alloc (attr->cu->dbg,
> +                                         struct loc_s, sizeof (struct loc_s),
> +                                         1);
> +       newp->addr = attr->valp;
> +       newp->loc = result;
> +       newp->nloc = 1;
> +
> +       found = eu_tsearch (newp, &attr->cu->locs_tree, loc_compare);
> +     }
>  
> -      found = eu_tsearch (newp, &attr->cu->locs_tree, loc_compare);
> +      mutex_unlock (attr->cu->intern_lock);
>      }
>  
>    assert ((*found)->nloc == 1);

OK, so intern_lock now covers both tfind and tsearch (again, do we need
the eu_ variants here? isn't that double locking now?)

> @@ -674,13 +685,17 @@ getlocation (struct Dwarf_CU *cu, const Dwarf_Block 
> *block,
>        return 0;
>      }
>  
> -  return __libdw_intern_expression (cu->dbg, cu->dbg->other_byte_order,
> -                                 cu->address_size, (cu->version == 2
> -                                                    ? cu->address_size
> -                                                    : cu->offset_size),
> -                                 &cu->locs_tree, block,
> -                                 false, false,
> -                                 llbuf, listlen, sec_index);
> +  mutex_lock (cu->intern_lock);
> +  int res = __libdw_intern_expression (cu->dbg, cu->dbg->other_byte_order,
> +                                    cu->address_size, (cu->version == 2
> +                                                       ? cu->address_size
> +                                                       : cu->offset_size),
> +                                    &cu->locs_tree, block,
> +                                    false, false,
> +                                    llbuf, listlen, sec_index);
> +  mutex_unlock (cu->intern_lock);
> +
> +  return res;
>  }

Ack, so cu intern_lock here instead of Dwarf_CFI one.

>  int
> diff --git a/libdw/dwarf_getmacros.c b/libdw/dwarf_getmacros.c
> index f1c831fa..80f652c3 100644
> --- a/libdw/dwarf_getmacros.c
> +++ b/libdw/dwarf_getmacros.c
> @@ -322,15 +322,29 @@ cache_op_table (Dwarf *dbg, int sec_index, Dwarf_Off 
> macoff,
>    if (found != NULL)
>      return *found;
>  
> +  mutex_lock (dbg->macro_lock);
> +
> +  found = eu_tfind (&fake, &dbg->macro_ops_tree, macro_op_compare);
> +  if (found != NULL)
> +    {
> +      mutex_unlock (dbg->macro_lock);
> +      return *found;
> +    }
> +
>    Dwarf_Macro_Op_Table *table = sec_index == IDX_debug_macro
>      ? get_table_for_offset (dbg, macoff, startp, endp, cudie)
>      : get_macinfo_table (dbg, macoff, cudie);
>  
>    if (table == NULL)
> -    return NULL;
> +    {
> +      mutex_unlock (dbg->macro_lock);
> +      return NULL;
> +    }
>  
>    Dwarf_Macro_Op_Table **ret = eu_tsearch (table, &dbg->macro_ops_tree,
>                                       macro_op_compare);
> +  mutex_unlock (dbg->macro_lock);
> +
>    if (unlikely (ret == NULL))
>      {
>        __libdw_seterrno (DWARF_E_NOMEM);

OK, so both tfind and tsearch now covered by the dbg->macro_lock
guarding the macro_ops_tree (eu_ variants still needed?)

> diff --git a/libdw/frame-cache.c b/libdw/frame-cache.c
> index 6c89858a..6f6f777e 100644
> --- a/libdw/frame-cache.c
> +++ b/libdw/frame-cache.c
> @@ -64,6 +64,7 @@ __libdw_destroy_frame_cache (Dwarf_CFI *cache)
>    eu_search_tree_fini (&cache->fde_tree, free_fde);
>    eu_search_tree_fini (&cache->cie_tree, free_cie);
>    eu_search_tree_fini (&cache->expr_tree, free_expr);
> +  mutex_fini (cache->lock);
>  
>    if (cache->ebl != NULL && cache->ebl != (void *) -1l)
>      ebl_closebackend (cache->ebl);

Ack.

> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index de80cd4e..77d0c113 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -269,7 +269,7 @@ struct Dwarf
>       __libdw_intern_next_unit.  */
>    mutex_define(, dwarf_lock);
>  
> -  /* Synchronize access to dwarf_macro_getsrcfiles.  */
> +  /* Synchronize access to dwarf_macro_getsrcfiles and cache_op_table.  */
>    mutex_define(, macro_lock);

Thanks for updating the comment.

>    /* Internal memory handling.  This is basically a simplified thread-local
> @@ -471,6 +471,10 @@ struct Dwarf_CU
>       Covers __libdw_str_offsets_base_off.  */
>    mutex_define(, str_off_base_lock);
>  
> +  /* Synchronize access to is_constant_offset.  Should also be held
> +     when calling __libdw_intern_expression with Dwarf_CU members.  */
> +  mutex_define(, intern_lock);
> +

Right. So maybe cross reference doc between here and Dwarf_CFI_s lock?

>    /* Memory boundaries of this CU.  */
>    void *startp;
>    void *endp;
> diff --git a/libdw/libdw_findcu.c b/libdw/libdw_findcu.c
> index 0e4dcc37..59267343 100644
> --- a/libdw/libdw_findcu.c
> +++ b/libdw/libdw_findcu.c
> @@ -181,6 +181,7 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
>    rwlock_init (newp->split_lock);
>    mutex_init (newp->src_lock);
>    mutex_init (newp->str_off_base_lock);
> +  mutex_init (newp->intern_lock);
>  
>    /* v4 debug type units have version == 4 and unit_type == DW_UT_type.  */
>    if (debug_types)

Ack.

> @@ -240,8 +241,6 @@ struct Dwarf_CU *
>  internal_function
>  __libdw_findcu (Dwarf *dbg, Dwarf_Off start, bool v4_debug_types)
>  {
> -  mutex_lock (dbg->dwarf_lock);
> -
>    search_tree *tree = v4_debug_types ? &dbg->tu_tree : &dbg->cu_tree;
>    Dwarf_Off *next_offset
>      = v4_debug_types ? &dbg->next_tu_offset : &dbg->next_cu_offset;
> @@ -250,6 +249,12 @@ __libdw_findcu (Dwarf *dbg, Dwarf_Off start, bool 
> v4_debug_types)
>    struct Dwarf_CU fake = { .start = start, .end = 0 };
>    struct Dwarf_CU **found = eu_tfind (&fake, tree, findcu_cb);
>    struct Dwarf_CU *result = NULL;
> +  if (found != NULL)
> +    return *found;
> +
> +  mutex_lock (dbg->dwarf_lock);
> +
> +  found = eu_tfind (&fake, tree, findcu_cb);
>    if (found != NULL)
>      {
>        mutex_unlock (dbg->dwarf_lock);

OK, so in this case you do need the eu_tfind variant because both calls
need to be guarded. The second call just has one more extra lock around
it (the dwarf_lock) that also covers the possible eu_tsearch call.

Thanks,

Mark

Reply via email to