Hi Aaron, On Tue, 2025-07-15 at 00:25 -0400, Aaron Merey wrote: > Currently each archive descriptor maintains a single Elf_Arhdr for the > current archive member (as determined by elf_next or elf_rand) which is > returned by elf_getarhdr. > > A single per-archive Elf_Arhdr is not ideal since elf_next and elf_rand > can invalidate an archive member's reference to its own Elf_Arhdr. > > Avoid this possible invalidation by storing each Elf_Arhdr in its > archive member descriptor. > > The existing Elf_Arhdr parsing and storage in the archive descriptor > internal state is left mostly untouched. When an archive member is > created with elf_begin it is given its own copy of its Elf_Arhdr from > the archive descriptor. > > Also update tests/arextract.c with verification that each Elf_Arhdr > is distinct and remains valid after calls to elf_next that would > have previously invalidated the Elf_Arhdr.
Nice. I like the design, but some questions below. > diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c > index 3ed1f8d7..5ed5aaa3 100644 > --- a/libelf/elf_begin.c > +++ b/libelf/elf_begin.c > @@ -1065,11 +1065,14 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref) > result = read_file (fildes, ref->state.ar.offset + sizeof (struct ar_hdr), > ref->state.ar.elf_ar_hdr.ar_size, cmd, ref); > > - /* Enlist this new descriptor in the list of children. */ > if (result != NULL) > { > + /* Enlist this new descriptor in the list of children. */ > result->next = ref->state.ar.children; > ref->state.ar.children = result; > + > + /* Ensure the member descriptor has its own copy of its header info. > */ > + result->elf_ar_hdr = ref->state.ar.elf_ar_hdr; > } > > return result; Shouldn't this be result->elf_ar_hdr = ref->elf_ar_hdr. See also below, the main question is if state.ar should still have an elf_ar_hdr field itself. > diff --git a/libelf/elf_clone.c b/libelf/elf_clone.c > index e6fe4729..d6c8d541 100644 > --- a/libelf/elf_clone.c > +++ b/libelf/elf_clone.c > @@ -69,6 +69,7 @@ elf_clone (Elf *elf, Elf_Cmd cmd) > == offsetof (struct Elf, state.elf64.scns)); > retval->state.elf.scns_last = &retval->state.elf32.scns; > retval->state.elf32.scns.max = elf->state.elf32.scns.max; > + retval->elf_ar_hdr = elf->elf_ar_hdr; > > retval->class = elf->class; > } Right, this is what I was expecting in elfdup above too. > diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c > index 509f1da5..ec85fa71 100644 > --- a/libelf/elf_getarhdr.c > +++ b/libelf/elf_getarhdr.c > @@ -44,30 +44,12 @@ elf_getarhdr (Elf *elf) > if (elf == NULL) > return NULL; > > - Elf *parent = elf->parent; > - > /* Calling this function is not ok for any file type but archives. */ > - if (parent == NULL) > + if (elf->parent == NULL) > { > __libelf_seterrno (ELF_E_INVALID_OP); > return NULL; > } Should this be if (elf->parent == NULL || elf->parent->kind != ELF_K_AR) Below we do have an assert, so maybe this suggestion is wrong. > - /* Make sure we have read the archive header. */ > - if (parent->state.ar.elf_ar_hdr.ar_name == NULL > - && __libelf_next_arhdr_wrlock (parent) != 0) > - { > - rwlock_wrlock (parent->lock); > - int st = __libelf_next_arhdr_wrlock (parent); > - rwlock_unlock (parent->lock); > - > - if (st != 0) > - /* Something went wrong. Maybe there is no member left. */ > - return NULL; > - } > - > - /* We can be sure the parent is an archive. */ > - assert (parent->kind == ELF_K_AR); Was this assert correct? Can an Elf only have an parent if it comes from an ELF_K_AR file? > - return &parent->state.ar.elf_ar_hdr; > + return &elf->elf_ar_hdr; > } > diff --git a/libelf/libelfP.h b/libelf/libelfP.h > index 66e7e4dd..20120ad3 100644 > --- a/libelf/libelfP.h > +++ b/libelf/libelfP.h > @@ -306,6 +306,9 @@ struct Elf > /* Reference counting for the descriptor. */ > int ref_count; > > + /* Per-descriptor copy of the structure returned by 'elf_getarhdr'. */ > + Elf_Arhdr elf_ar_hdr; > + > /* Lock to handle multithreaded programs. */ > rwlock_define (,lock); So this adds an Elf_Arhdr to all Elfs. Three questions: - Do you still need an elf_ar_hdr field in the state union for ar? - Should this go into the the elf and/or elf32/64 state union structs instead? - What about the state.ar.ar_name and state.ar_rawname fields? Those are pointed to from the elf_ar_hdr fields ar_name and raw_name. So shouldn't they also be held as top-level or elf[32|64] field states? > diff --git a/tests/arextract.c b/tests/arextract.c > index 936d7f55..7920d1c9 100644 > --- a/tests/arextract.c > +++ b/tests/arextract.c > @@ -21,12 +21,20 @@ > > #include <fcntl.h> > #include <gelf.h> > +#include <limits.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > #include <unistd.h> > #include <system.h> > > +typedef struct hdr_node { > + Elf *elf; > + Elf_Arhdr *hdr; > + struct hdr_node *next; > +} hdr_node; > + > +hdr_node *hdr_list = NULL; > > int > main (int argc, char *argv[]) > @@ -80,6 +88,27 @@ main (int argc, char *argv[]) > exit (1); > } > > + /* Keep a list of subelfs and their Elf_Arhdr. This is used to > + verifiy that each archive member descriptor stores its own > + Elf_Ahdr as opposed to the archive descriptor storing one > + Elf_Ahdr at a time for all archive members. */ > + hdr_node *node = calloc (1, sizeof (hdr_node)); > + if (node == NULL) > + { > + printf ("calloc failed: %s\n", strerror (errno)); > + exit (1); > + } > + node->elf = subelf; > + node->hdr = arhdr; > + > + if (hdr_list != NULL) > + { > + node->next = hdr_list; > + hdr_list = node; > + } > + else > + hdr_list = node; > + > if (strcmp (arhdr->ar_name, argv[2]) == 0) > { > int outfd; > @@ -128,8 +157,37 @@ Failed to get base address for the archive element: > %s\n", > exit (1); > } > > - /* Close the descriptors. */ > - if (elf_end (subelf) != 0 || elf_end (elf) != 0) > + /* Close each subelf descriptor. */ > + hdr_node *cur; > + while ((cur = hdr_list) != NULL) > + { > + /* Read arhdr names to help detect if there's a problem with the > + per-member Elf_Arhdr storage. */ > + if (memchr (cur->hdr->ar_name, '\0', PATH_MAX) == NULL) > + { > + puts ("ar_name missing null character"); > + exit (1); > + } > + > + if (memchr (cur->hdr->ar_rawname, '\0', PATH_MAX) == NULL) > + { > + puts ("ar_rawname missing null character"); > + exit (1); > + } I wonder if there is some way to check if the ar_name and/or ar_rawname are unique here? > + if (elf_end (cur->elf) != 0) > + { > + printf ("Error while freeing subELF descriptor: %s\n", > + elf_errmsg (-1)); > + exit (1); > + } > + > + hdr_list = cur->next; > + free (cur); > + } > + > + /* Close the archive descriptor. */ > + if (elf_end (elf) != 0) > { > printf ("Freeing ELF descriptors failed: %s", elf_errmsg (-1)); > exit (1); > > @@ -144,12 +202,6 @@ Failed to get base address for the archive element: > %s\n", > > /* Get next archive element. */ > cmd = elf_next (subelf); > - if (elf_end (subelf) != 0) > - { > - printf ("error while freeing sub-ELF descriptor: %s\n", > - elf_errmsg (-1)); > - exit (1); > - } > } > > /* When we reach this point we haven't found the given file in the Nice test. Cheers, Mark