Currently each archive descriptor maintains a single Elf_Arhdr for the current archive member (as determined by elf_next or elf_rand) which is returned by elf_getarhdr.
A single per-archive Elf_Arhdr is not ideal since elf_next and elf_rand can invalidate an archive member's reference to its own Elf_Arhdr. Avoid this possible invalidation by storing each Elf_Arhdr in its archive member descriptor. The existing Elf_Arhdr parsing and storage in the archive descriptor internal state is left mostly untouched. When an archive member is created with elf_begin it is given its own copy of its Elf_Arhdr from the archive descriptor. Also update tests/arextract.c with verification that each Elf_Arhdr is distinct and remains valid after calls to elf_next that would have previously invalidated the Elf_Arhdr. Signed-off-by: Aaron Merey <ame...@redhat.com> --- v2: Add new static function copy_arhdr to elf_begin.c to handle allocation of duplicate ar_name and ar_rawname. Also add testcase to run-arextract.sh verifying that each Elf_Arhdr has a unique ar_name and ar_rawname. On Tue, Jul 15, 2025 at 12:19 PM Mark Wielaard <m...@klomp.org> wrote: > > + /* Per-descriptor copy of the structure returned by 'elf_getarhdr'. */ > > + Elf_Arhdr elf_ar_hdr; > > + > > /* Lock to handle multithreaded programs. */ > > rwlock_define (,lock); > > So this adds an Elf_Arhdr to all Elfs. > > Three questions: > - Do you still need an elf_ar_hdr field in the state union for ar? > - Should this go into the the elf and/or elf32/64 state union > structs instead? > - What about the state.ar.ar_name and state.ar_rawname fields? > Those are pointed to from the elf_ar_hdr fields ar_name and raw_name. > So shouldn't they also be held as top-level or elf[32|64] field > states? 1. I kept state.ar.elf_ar_hdr because the archive descriptor still needs to keep its own copy of the current header in between elf_next reading the next header and elf_begin creating the new descriptor for the archive member. 2. I've moved the archive member copy of Elf_Arhdr to state.[elf|elf32|elf64]. 3. Archive members now have their own separate copies of ar_name and ar_rawname. The new testcase verifies this. Now that we can keep multiple archive member descriptors open at once, we can improve how elf_next, elf_begin, elf_rand and elf_clone behave with archive member descriptors. Currently elf_clone does not work with archive members. elf_begin will always allocate a new archive member instead of increasing an existing member's ref_count when possible. elf_next advances the parent descriptor's state to the next archive header instead of advancing to the header following the member elf_next was called with. We have already discussed changing elf_next so that it advances the parent's state to one-past the member that elf_next is called with. This would involve adding an arhdr offset to state.[elf|elf32|elf64]. Do we also want to change elf_clone and elf_begin as described above? The elf_clone change would enable eu-strip of archive members, which is currently not supported. libelf/elf_begin.c | 60 ++++++++++++++++++++++++++++++++++++- libelf/elf_end.c | 9 ++++++ libelf/elf_getarhdr.c | 22 ++------------ libelf/libelfP.h | 6 +++- tests/arextract.c | 70 ++++++++++++++++++++++++++++++++++++++----- 5 files changed, 137 insertions(+), 30 deletions(-) diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c index 3ed1f8d7..7e722ea6 100644 --- a/libelf/elf_begin.c +++ b/libelf/elf_begin.c @@ -815,6 +815,53 @@ read_long_names (Elf *elf) } +/* Copy archive header from parent archive ref to member descriptor elf. */ +static int +copy_arhdr (Elf_Arhdr *dest, Elf *ref) +{ + Elf_Arhdr *hdr; + + if (ref->kind == ELF_K_AR) + hdr = &ref->state.ar.elf_ar_hdr; + else + hdr = &ref->state.elf.elf_ar_hdr; + + char *ar_name = hdr->ar_name; + char *ar_rawname = hdr->ar_rawname; + if (ar_name == NULL || ar_rawname == NULL) + { + /* ref doesn't have an Elf_Arhdr or it was marked as unusable. */ + return 0; + } + + /* Allocate copies of ar_name and ar_rawname. */ + size_t name_len = strlen (ar_name) + 1; + char *name_copy = malloc (MAX (name_len, 16)); + if (name_copy == NULL) + { + __libelf_seterrno (ELF_E_NOMEM); + return -1; + } + memcpy (name_copy, ar_name, name_len); + + size_t rawname_len = strlen (ar_rawname) + 1; + char *rawname_copy = malloc (MAX (rawname_len, 17)); + if (rawname_copy == NULL) + { + free (name_copy); + __libelf_seterrno (ELF_E_NOMEM); + return -1; + } + memcpy (rawname_copy, ar_rawname, rawname_len); + + *dest = *hdr; + dest->ar_name = name_copy; + dest->ar_rawname = rawname_copy; + + return 0; +} + + /* Read the next archive header. */ int internal_function @@ -1060,17 +1107,28 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref) /* Something went wrong. Maybe there is no member left. */ return NULL; + Elf_Arhdr ar_hdr = {0}; + if (copy_arhdr (&ar_hdr, ref) != 0) + /* Out of memory. */ + return NULL; + /* We have all the information we need about the next archive member. Now create a descriptor for it. */ result = read_file (fildes, ref->state.ar.offset + sizeof (struct ar_hdr), ref->state.ar.elf_ar_hdr.ar_size, cmd, ref); - /* Enlist this new descriptor in the list of children. */ if (result != NULL) { + /* Enlist this new descriptor in the list of children. */ result->next = ref->state.ar.children; + result->state.elf.elf_ar_hdr = ar_hdr; ref->state.ar.children = result; } + else + { + free (ar_hdr.ar_name); + free (ar_hdr.ar_rawname); + } return result; } diff --git a/libelf/elf_end.c b/libelf/elf_end.c index da8f3a20..1d366127 100644 --- a/libelf/elf_end.c +++ b/libelf/elf_end.c @@ -116,6 +116,15 @@ elf_end (Elf *elf) rwlock_unlock (parent->lock); } + if (elf->kind != ELF_K_AR) + { + if (elf->state.elf.elf_ar_hdr.ar_name != NULL) + free (elf->state.elf.elf_ar_hdr.ar_name); + + if (elf->state.elf.elf_ar_hdr.ar_rawname != NULL) + free (elf->state.elf.elf_ar_hdr.ar_rawname); + } + /* This was the last activation. Free all resources. */ switch (elf->kind) { diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c index 509f1da5..9211fc2e 100644 --- a/libelf/elf_getarhdr.c +++ b/libelf/elf_getarhdr.c @@ -44,30 +44,12 @@ elf_getarhdr (Elf *elf) if (elf == NULL) return NULL; - Elf *parent = elf->parent; - /* Calling this function is not ok for any file type but archives. */ - if (parent == NULL) + if (elf->parent == NULL || elf->parent->kind != ELF_K_AR) { __libelf_seterrno (ELF_E_INVALID_OP); return NULL; } - /* Make sure we have read the archive header. */ - if (parent->state.ar.elf_ar_hdr.ar_name == NULL - && __libelf_next_arhdr_wrlock (parent) != 0) - { - rwlock_wrlock (parent->lock); - int st = __libelf_next_arhdr_wrlock (parent); - rwlock_unlock (parent->lock); - - if (st != 0) - /* Something went wrong. Maybe there is no member left. */ - return NULL; - } - - /* We can be sure the parent is an archive. */ - assert (parent->kind == ELF_K_AR); - - return &parent->state.ar.elf_ar_hdr; + return &elf->state.elf.elf_ar_hdr; } diff --git a/libelf/libelfP.h b/libelf/libelfP.h index 66e7e4dd..1b93da88 100644 --- a/libelf/libelfP.h +++ b/libelf/libelfP.h @@ -323,6 +323,7 @@ struct Elf read from the file. */ search_tree rawchunk_tree; /* Tree and lock for elf_getdata_rawchunk results. */ + Elf_Arhdr elf_ar_hdr; /* Structure returned by 'elf_getarhdr'. */ unsigned int scnincr; /* Number of sections allocate the last time. */ int ehdr_flags; /* Flags (dirty) for ELF header. */ @@ -343,6 +344,7 @@ struct Elf read from the file. */ search_tree rawchunk_tree; /* Tree and lock for elf_getdata_rawchunk results. */ + Elf_Arhdr elf_ar_hdr; /* Structure returned by 'elf_getarhdr'. */ unsigned int scnincr; /* Number of sections allocate the last time. */ int ehdr_flags; /* Flags (dirty) for ELF header. */ @@ -369,6 +371,7 @@ struct Elf read from the file. */ search_tree rawchunk_tree; /* Tree and lock for elf_getdata_rawchunk results. */ + Elf_Arhdr elf_ar_hdr; /* Structure returned by 'elf_getarhdr'. */ unsigned int scnincr; /* Number of sections allocate the last time. */ int ehdr_flags; /* Flags (dirty) for ELF header. */ @@ -394,7 +397,8 @@ struct Elf int64_t offset; /* Offset in file we are currently at. elf_next() advances this to the next member of the archive. */ - Elf_Arhdr elf_ar_hdr; /* Structure returned by 'elf_getarhdr'. */ + Elf_Arhdr elf_ar_hdr; /* Copy of current archive member's structure + returned by 'elf_getarhdr'. */ struct ar_hdr ar_hdr; /* Header read from file. */ char ar_name[16]; /* NUL terminated ar_name of elf_ar_hdr. */ char raw_name[17]; /* This is a buffer for the NUL terminated diff --git a/tests/arextract.c b/tests/arextract.c index 936d7f55..fced8827 100644 --- a/tests/arextract.c +++ b/tests/arextract.c @@ -27,6 +27,13 @@ #include <unistd.h> #include <system.h> +typedef struct hdr_node { + Elf *elf; + Elf_Arhdr *hdr; + struct hdr_node *next; +} hdr_node; + +hdr_node *hdr_list = NULL; int main (int argc, char *argv[]) @@ -80,6 +87,27 @@ main (int argc, char *argv[]) exit (1); } + /* Keep a list of subelfs and their Elf_Arhdr. This is used to + verifiy that each archive member descriptor stores its own + Elf_Ahdr as opposed to the archive descriptor storing one + Elf_Ahdr at a time for all archive members. */ + hdr_node *node = calloc (1, sizeof (hdr_node)); + if (node == NULL) + { + printf ("calloc failed: %s\n", strerror (errno)); + exit (1); + } + node->elf = subelf; + node->hdr = arhdr; + + if (hdr_list != NULL) + { + node->next = hdr_list; + hdr_list = node; + } + else + hdr_list = node; + if (strcmp (arhdr->ar_name, argv[2]) == 0) { int outfd; @@ -128,8 +156,40 @@ Failed to get base address for the archive element: %s\n", exit (1); } - /* Close the descriptors. */ - if (elf_end (subelf) != 0 || elf_end (elf) != 0) + /* Verify each subelf descriptor contains a unique copy of its arhdr + and then close each subelf descriptor. */ + hdr_node *cur; + while ((cur = hdr_list) != NULL) + { + /* Verify that arhdr names are unique. */ + for (hdr_node *n = cur->next; n != NULL; n = n->next) + { + if (strcmp (cur->hdr->ar_name, n->hdr->ar_name) == 0) + { + puts ("Duplicate ar_name"); + exit (1); + } + + if (strcmp (cur->hdr->ar_rawname, n->hdr->ar_rawname) == 0) + { + puts ("Duplicate ar_rawname"); + exit (1); + } + } + + if (elf_end (cur->elf) != 0) + { + printf ("Error while freeing subELF descriptor: %s\n", + elf_errmsg (-1)); + exit (1); + } + + hdr_list = cur->next; + free (cur); + } + + /* Close the archive descriptor. */ + if (elf_end (elf) != 0) { printf ("Freeing ELF descriptors failed: %s", elf_errmsg (-1)); exit (1); @@ -144,12 +204,6 @@ Failed to get base address for the archive element: %s\n", /* Get next archive element. */ cmd = elf_next (subelf); - if (elf_end (subelf) != 0) - { - printf ("error while freeing sub-ELF descriptor: %s\n", - elf_errmsg (-1)); - exit (1); - } } /* When we reach this point we haven't found the given file in the -- 2.50.1