Currently each archive descriptor maintains a single Elf_Arhdr for the
current archive member (as determined by elf_next or elf_rand) which is
returned by elf_getarhdr.

A single per-archive Elf_Arhdr is not ideal since elf_next and elf_rand
can invalidate an archive member's reference to its own Elf_Arhdr.

Avoid this possible invalidation by storing each Elf_Arhdr in its
archive member descriptor.

The existing Elf_Arhdr parsing and storage in the archive descriptor
internal state is left mostly untouched.  When an archive member is
created with elf_begin it is given its own copy of its Elf_Arhdr from
the archive descriptor.

Also update tests/arextract.c with verification that each Elf_Arhdr
is distinct and remains valid after calls to elf_next that would
have previously invalidated the Elf_Arhdr.

Signed-off-by: Aaron Merey <ame...@redhat.com>

---
v2: Add new static function copy_arhdr to elf_begin.c to handle
allocation of duplicate ar_name and ar_rawname.  Also add testcase
to run-arextract.sh verifying that each Elf_Arhdr has a unique
ar_name and ar_rawname.

On Tue, Jul 15, 2025 at 12:19 PM Mark Wielaard <m...@klomp.org> wrote:
> > +  /* Per-descriptor copy of the structure returned by 'elf_getarhdr'.  */
> > +  Elf_Arhdr elf_ar_hdr;
> > +
> >    /* Lock to handle multithreaded programs.  */
> >    rwlock_define (,lock);
>
> So this adds an Elf_Arhdr to all Elfs.
>
> Three questions:
> - Do you still need an elf_ar_hdr field in the state union for ar?
> - Should this go into the the elf and/or elf32/64 state union
>   structs instead?
> - What about the state.ar.ar_name and state.ar_rawname fields?
>   Those are pointed to from the elf_ar_hdr fields ar_name and raw_name.
>   So shouldn't they also be held as top-level or elf[32|64] field
>   states?

1. I kept state.ar.elf_ar_hdr because the archive descriptor still needs
to keep its own copy of the current header in between elf_next reading
the next header and elf_begin creating the new descriptor for the
archive member.

2. I've moved the archive member copy of Elf_Arhdr to
state.[elf|elf32|elf64].

3. Archive members now have their own separate copies of ar_name and
ar_rawname.  The new testcase verifies this.

Now that we can keep multiple archive member descriptors open at once,
we can improve how elf_next, elf_begin, elf_rand and elf_clone behave
with archive member descriptors.

Currently elf_clone does not work with archive members.  elf_begin will
always allocate a new archive member instead of increasing an existing
member's ref_count when possible.  elf_next advances the parent
descriptor's state to the next archive header instead of advancing to
the header following the member elf_next was called with.

We have already discussed changing elf_next so that it advances the
parent's state to one-past the member that elf_next is called with.
This would involve adding an arhdr offset to state.[elf|elf32|elf64].
Do we also want to change elf_clone and elf_begin as described above?
The elf_clone change would enable eu-strip of archive members, which
is currently not supported.

 libelf/elf_begin.c    | 60 ++++++++++++++++++++++++++++++++++++-
 libelf/elf_end.c      |  9 ++++++
 libelf/elf_getarhdr.c | 22 ++------------
 libelf/libelfP.h      |  6 +++-
 tests/arextract.c     | 70 ++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 137 insertions(+), 30 deletions(-)

diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c
index 3ed1f8d7..7e722ea6 100644
--- a/libelf/elf_begin.c
+++ b/libelf/elf_begin.c
@@ -815,6 +815,53 @@ read_long_names (Elf *elf)
 }
 
 
+/* Copy archive header from parent archive ref to member descriptor elf.  */
+static int
+copy_arhdr (Elf_Arhdr *dest, Elf *ref)
+{
+  Elf_Arhdr *hdr;
+
+  if (ref->kind == ELF_K_AR)
+    hdr = &ref->state.ar.elf_ar_hdr;
+  else
+    hdr = &ref->state.elf.elf_ar_hdr;
+
+  char *ar_name = hdr->ar_name;
+  char *ar_rawname = hdr->ar_rawname;
+  if (ar_name == NULL || ar_rawname == NULL)
+    {
+      /* ref doesn't have an Elf_Arhdr or it was marked as unusable.  */
+      return 0;
+    }
+
+  /* Allocate copies of ar_name and ar_rawname.  */
+  size_t name_len = strlen (ar_name) + 1;
+  char *name_copy = malloc (MAX (name_len, 16));
+  if (name_copy == NULL)
+    {
+      __libelf_seterrno (ELF_E_NOMEM);
+      return -1;
+    }
+  memcpy (name_copy, ar_name, name_len);
+
+  size_t rawname_len = strlen (ar_rawname) + 1;
+  char *rawname_copy = malloc (MAX (rawname_len, 17));
+  if (rawname_copy == NULL)
+    {
+      free (name_copy);
+      __libelf_seterrno (ELF_E_NOMEM);
+      return -1;
+    }
+  memcpy (rawname_copy, ar_rawname, rawname_len);
+
+  *dest = *hdr;
+  dest->ar_name = name_copy;
+  dest->ar_rawname = rawname_copy;
+
+  return 0;
+}
+
+
 /* Read the next archive header.  */
 int
 internal_function
@@ -1060,17 +1107,28 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
     /* Something went wrong.  Maybe there is no member left.  */
     return NULL;
 
+  Elf_Arhdr ar_hdr = {0};
+  if (copy_arhdr (&ar_hdr, ref) != 0)
+    /* Out of memory.  */
+    return NULL;
+
   /* We have all the information we need about the next archive member.
      Now create a descriptor for it.  */
   result = read_file (fildes, ref->state.ar.offset + sizeof (struct ar_hdr),
                      ref->state.ar.elf_ar_hdr.ar_size, cmd, ref);
 
-  /* Enlist this new descriptor in the list of children.  */
   if (result != NULL)
     {
+      /* Enlist this new descriptor in the list of children.  */
       result->next = ref->state.ar.children;
+      result->state.elf.elf_ar_hdr = ar_hdr;
       ref->state.ar.children = result;
     }
+  else
+    {
+      free (ar_hdr.ar_name);
+      free (ar_hdr.ar_rawname);
+    }
 
   return result;
 }
diff --git a/libelf/elf_end.c b/libelf/elf_end.c
index da8f3a20..1d366127 100644
--- a/libelf/elf_end.c
+++ b/libelf/elf_end.c
@@ -116,6 +116,15 @@ elf_end (Elf *elf)
       rwlock_unlock (parent->lock);
     }
 
+  if (elf->kind != ELF_K_AR)
+    {
+      if (elf->state.elf.elf_ar_hdr.ar_name != NULL)
+       free (elf->state.elf.elf_ar_hdr.ar_name);
+
+      if (elf->state.elf.elf_ar_hdr.ar_rawname != NULL)
+       free (elf->state.elf.elf_ar_hdr.ar_rawname);
+    }
+
   /* This was the last activation.  Free all resources.  */
   switch (elf->kind)
     {
diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c
index 509f1da5..9211fc2e 100644
--- a/libelf/elf_getarhdr.c
+++ b/libelf/elf_getarhdr.c
@@ -44,30 +44,12 @@ elf_getarhdr (Elf *elf)
   if (elf == NULL)
     return NULL;
 
-  Elf *parent = elf->parent;
-
   /* Calling this function is not ok for any file type but archives.  */
-  if (parent == NULL)
+  if (elf->parent == NULL || elf->parent->kind != ELF_K_AR)
     {
       __libelf_seterrno (ELF_E_INVALID_OP);
       return NULL;
     }
 
-  /* Make sure we have read the archive header.  */
-  if (parent->state.ar.elf_ar_hdr.ar_name == NULL
-      && __libelf_next_arhdr_wrlock (parent) != 0)
-    {
-      rwlock_wrlock (parent->lock);
-      int st = __libelf_next_arhdr_wrlock (parent);
-      rwlock_unlock (parent->lock);
-
-      if (st != 0)
-       /* Something went wrong.  Maybe there is no member left.  */
-       return NULL;
-    }
-
-  /* We can be sure the parent is an archive.  */
-  assert (parent->kind == ELF_K_AR);
-
-  return &parent->state.ar.elf_ar_hdr;
+  return &elf->state.elf.elf_ar_hdr;
 }
diff --git a/libelf/libelfP.h b/libelf/libelfP.h
index 66e7e4dd..1b93da88 100644
--- a/libelf/libelfP.h
+++ b/libelf/libelfP.h
@@ -323,6 +323,7 @@ struct Elf
                                   read from the file.  */
       search_tree rawchunk_tree;  /* Tree and lock for elf_getdata_rawchunk
                                     results.  */
+      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
       unsigned int scnincr;    /* Number of sections allocate the last
                                   time.  */
       int ehdr_flags;          /* Flags (dirty) for ELF header.  */
@@ -343,6 +344,7 @@ struct Elf
                                   read from the file.  */
       search_tree rawchunk_tree;  /* Tree and lock for
                                     elf_getdata_rawchunk results.  */
+      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
       unsigned int scnincr;    /* Number of sections allocate the last
                                   time.  */
       int ehdr_flags;          /* Flags (dirty) for ELF header.  */
@@ -369,6 +371,7 @@ struct Elf
                                   read from the file.  */
       search_tree rawchunk_tree;  /* Tree and lock for
                                     elf_getdata_rawchunk results.  */
+      Elf_Arhdr elf_ar_hdr;     /* Structure returned by 'elf_getarhdr'.  */
       unsigned int scnincr;    /* Number of sections allocate the last
                                   time.  */
       int ehdr_flags;          /* Flags (dirty) for ELF header.  */
@@ -394,7 +397,8 @@ struct Elf
       int64_t offset;          /* Offset in file we are currently at.
                                   elf_next() advances this to the next
                                   member of the archive.  */
-      Elf_Arhdr elf_ar_hdr;    /* Structure returned by 'elf_getarhdr'.  */
+      Elf_Arhdr elf_ar_hdr;     /* Copy of current archive member's structure
+                                  returned by 'elf_getarhdr'.  */
       struct ar_hdr ar_hdr;    /* Header read from file.  */
       char ar_name[16];                /* NUL terminated ar_name of 
elf_ar_hdr.  */
       char raw_name[17];       /* This is a buffer for the NUL terminated
diff --git a/tests/arextract.c b/tests/arextract.c
index 936d7f55..fced8827 100644
--- a/tests/arextract.c
+++ b/tests/arextract.c
@@ -27,6 +27,13 @@
 #include <unistd.h>
 #include <system.h>
 
+typedef struct hdr_node {
+    Elf *elf;
+    Elf_Arhdr *hdr;
+    struct hdr_node *next;
+} hdr_node;
+
+hdr_node *hdr_list = NULL;
 
 int
 main (int argc, char *argv[])
@@ -80,6 +87,27 @@ main (int argc, char *argv[])
          exit (1);
        }
 
+        /* Keep a list of subelfs and their Elf_Arhdr.  This is used to
+           verifiy that each archive member descriptor stores its own
+           Elf_Ahdr as opposed to the archive descriptor storing one
+           Elf_Ahdr at a time for all archive members.  */
+        hdr_node *node = calloc (1, sizeof (hdr_node));
+        if (node == NULL)
+          {
+            printf ("calloc failed: %s\n", strerror (errno));
+            exit (1);
+          }
+        node->elf = subelf;
+        node->hdr = arhdr;
+
+        if (hdr_list != NULL)
+          {
+           node->next = hdr_list;
+            hdr_list = node;
+          }
+       else
+          hdr_list = node;
+
       if (strcmp (arhdr->ar_name, argv[2]) == 0)
        {
          int outfd;
@@ -128,8 +156,40 @@ Failed to get base address for the archive element: %s\n",
              exit (1);
            }
 
-         /* Close the descriptors.  */
-         if (elf_end (subelf) != 0 || elf_end (elf) != 0)
+         /* Verify each subelf descriptor contains a unique copy of its arhdr
+            and then close each subelf descriptor.  */
+         hdr_node *cur;
+         while ((cur = hdr_list) != NULL)
+           {
+             /* Verify that arhdr names are unique. */
+             for (hdr_node *n = cur->next; n != NULL; n = n->next)
+               {
+                 if (strcmp (cur->hdr->ar_name, n->hdr->ar_name) == 0)
+                   {
+                     puts ("Duplicate ar_name");
+                     exit (1);
+                   }
+
+                 if (strcmp (cur->hdr->ar_rawname, n->hdr->ar_rawname) == 0)
+                   {
+                     puts ("Duplicate ar_rawname");
+                     exit (1);
+                   }
+               }
+
+             if (elf_end (cur->elf) != 0)
+               {
+                 printf ("Error while freeing subELF descriptor: %s\n",
+                         elf_errmsg (-1));
+                 exit (1);
+               }
+
+             hdr_list = cur->next;
+             free (cur);
+           }
+
+         /* Close the archive descriptor.  */
+         if (elf_end (elf) != 0)
            {
              printf ("Freeing ELF descriptors failed: %s", elf_errmsg (-1));
              exit (1);
@@ -144,12 +204,6 @@ Failed to get base address for the archive element: %s\n",
 
       /* Get next archive element.  */
       cmd = elf_next (subelf);
-      if (elf_end (subelf) != 0)
-       {
-         printf ("error while freeing sub-ELF descriptor: %s\n",
-                 elf_errmsg (-1));
-         exit (1);
-       }
     }
 
   /* When we reach this point we haven't found the given file in the
-- 
2.50.1

Reply via email to