Hi Heather,
On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> From: Heather McIntyre <[email protected]>
>
> * (try_split_file): Use eu_tsearch.
> Add lock for cu->split.
>
> Signed-off-by: Heather S. McIntyre <[email protected]>
> Signed-off-by: Mark Wielaard <[email protected]>
> ---
> libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
> 1 file changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> index 533f8072..a288e9bd 100644
> --- a/libdw/libdw_find_split_unit.c
> +++ b/libdw/libdw_find_split_unit.c
> @@ -34,13 +34,19 @@
> #include "libelfP.h"
>
> #include <limits.h>
> -#include <search.h>
> +#include <eu-search.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
>
> +/* __libdw_link_skel_split() modifies "skel->split = split"
> + and "split->split = skel".
> + "cu->split" is read at multiple locations and conditionally updated.
> + Mutual exclusion is enforced to prevent a race. */
> +rwlock_define(static, cu_split_lock);
__libdw_link_skel_split is defined in libdw/libdwP.h and is called in
try_split_file. (It is also called in src/readelf.c but that is a
terrible hack, so ignore that for now.)
A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
(Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
cu->split is NULL it means the split dwarf was searched for, but not
found.
> static void
> try_split_file (Dwarf_CU *cu, const char *dwo_path)
> {
> @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> if (split->unit_type == DW_UT_split_compile
> && cu->unit_id8 == split->unit_id8)
> {
> - if (tsearch (split->dbg, &cu->dbg->split_tree,
> + if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
> __libdw_finddbg_cb) == NULL)
> {
> /* Something went wrong. Don't link. */
> @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> break;
> }
>
> + rwlock_wrlock(cu_split_lock);
> +
> /* Link skeleton and split compile units. */
> __libdw_link_skel_split (cu, split);
>
> + rwlock_unlock(cu_split_lock);
> +
Is this locking wide enough? It seems like multiple threads can race
to this code and set different Dwarfs (which means they'll leak). See
below.
> /* We have everything we need from this ELF
> file. And we are going to close the fd to
> not run out of file descriptors. */
> @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> break;
> }
> }
> +
> + rwlock_rdlock(cu_split_lock);
> +
> if (cu->split == (Dwarf_CU *) -1)
> dwarf_end (split_dwarf);
> +
> + rwlock_unlock(cu_split_lock);
This means __libdw_link_skel_split wasn't called above (so the
split_dwarf created by dwarf_begin didn't contain the expected CU).
> }
> /* Always close, because we don't want to run out of file
> descriptors. See also the elf_fcntl ELF_C_FDDONE call
> @@ -89,9 +104,13 @@ Dwarf_CU *
> internal_function
> __libdw_find_split_unit (Dwarf_CU *cu)
> {
> + rwlock_rdlock(cu_split_lock);
> + Dwarf_CU *cu_split_local = cu->split;
> + rwlock_unlock(cu_split_lock);
> +
> /* Only try once. */
> - if (cu->split != (Dwarf_CU *) -1)
> - return cu->split;
> + if (cu_split_local != (Dwarf_CU *) -1)
> + return cu_split_local;
>
> /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
> The split unit will be the first in the dwo file and should have the
> @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> free (dwo_path);
> }
>
> - if (cu->split == (Dwarf_CU *) -1)
> + rwlock_rdlock(cu_split_lock);
> + cu_split_local = cu->split;
> + rwlock_unlock(cu_split_lock);
> +
> + if (cu_split_local == (Dwarf_CU *) -1)
> {
> /* Try compdir plus dwo_name. */
> Dwarf_Attribute compdir;
It looks like multiple threads can end up here after having seen
cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
threads will enter try_split_file and might all call
__libdw_link_skel_split as mentioned above.
> @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> }
> }
>
> + rwlock_wrlock(cu_split_lock);
> +
> /* If we found nothing, make sure we don't try again. */
> if (cu->split == (Dwarf_CU *) -1)
> - cu->split = NULL;
> + {
> + cu->split = NULL;
> + cu_split_local = cu->split;
> + }
>
> - return cu->split;
> + rwlock_unlock(cu_split_lock);
> + return cu_split_local;
> }
I think it would be better to keep the lock during the whole
__libdw_find_split_unit call.
Cheers,
Mark