On 2026-05-12 08:19, Pavel Cahyna wrote:
Resending the previous message with attached patch and an explanation of
the approach.

Thanks, somehow the earlier message got aged out of my spam folder (the resent message got marked as spam too, presumably for the same reason; somehow Amavis dislikes that email - perhaps because of CVE-2022-41352 which ironically was because Amavis was *not* using GNU paxutils!).

That old patch no longer applies due to intervening changes, so I did the minimal changes to make it apply, and to reindent and came up with the attached. I plan to look into this more carefully soon, with your comments in mind.
From d5a82f8f00d6163fef8fd8844d00e0861fe9ddb8 Mon Sep 17 00:00:00 2001
From: Pavel Cahyna <[email protected]>
Date: Tue, 12 May 2026 08:56:44 -0700
Subject: [PATCH] Draft patch for openat2 changes vs --one-top-level

The patch leverages the existing -C code. In order to do that, every
entry in the wd[] table gets another companion entry in the table that
represents the --one-top-level directory. There is one additional field
in each entry that allows skipping the companion entries if they are not
desired.

The actual directory is created lazily by chdir_do() if needed, as you
requested, to avoid empty "a/foo" after --one-top-level=foo -C a -C b.

The patch "by the way" fixes also extraction of hardlinks with
--one-top-level which currently is broken in the typical case (the
transform is not applied to the target, so the hardlink is wrong).

The patch does not yet handle the --show-transformed case with
--one-top-level that you discussed in another subthread. As a result, two
tests now fail (onetop02.at and onetop04.at). I suppose that this would
be quite easy to fix.

Another issue that I am aware of is that I am not sure whether to call
repair_delayed_set_stat and/or delay_set_stat on the newly created
directories like extract_dir() does (the whole delay_set code is abit
mysterious to me).

Use of --create together with --one-top-level should probably be
forbidden, as unlink.c uses wd[] in a way that will likely break in the
presence of companion entries (the chdir_do call in
flush_deferred_unlinks).
---
 src/common.h  |   5 +-
 src/extract.c |  93 +++++++++++++++++++++++++++++++++++---
 src/list.c    |  35 ++++++++++----
 src/misc.c    | 123 +++++++++++++++++++++++++++++++++++++++++++-------
 src/names.c   |  16 ++++---
 src/tar.c     |   5 +-
 src/unlink.c  |   8 ++--
 src/update.c  |   2 +-
 8 files changed, 237 insertions(+), 50 deletions(-)

diff --git a/src/common.h b/src/common.h
index e835d888..4a708ee5 100644
--- a/src/common.h
+++ b/src/common.h
@@ -558,6 +558,7 @@ void verify_volume (void);
 extern dev_t root_device;
 
 void extr_init (void);
+bool create_dir (char const *file_name);
 void extract_archive (void);
 void extract_finish (void);
 bool rename_directory (char *src, char *dst);
@@ -778,8 +779,8 @@ idx_t blocking_write (int fd, void const *buf, idx_t count);
 enum { BADFD = AT_FDCWD == -1 ? -2 : -1 };
 
 extern idx_t chdir_current;
-idx_t chdir_arg (char const *dir);
-void chdir_do (idx_t dir);
+idx_t chdir_arg (char const *dir, bool one_top_level);
+void chdir_do (idx_t dir, bool create);
 struct chdir_id { int err; dev_t st_dev; ino_t st_ino; } chdir_id (void);
 struct fdbase fdbase (char const *);
 struct fdbase fdbase1 (char const *);
diff --git a/src/extract.c b/src/extract.c
index 5cc3ccd6..2ad7806d 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -216,6 +216,8 @@ static Hash_table *delayed_link_table;
 static struct delayed_link *delayed_link_head;
 static struct delayed_link **delayed_link_tail = &delayed_link_head;
 
+static bool one_top_level_prepare = false;
+
 struct string_list
   {
     struct string_list *next;
@@ -815,9 +817,10 @@ make_directories (char *file_name, bool *interdir_made)
 	  /* Create a struct delayed_set_stat even if
 	     mode == desired_mode, because
 	     repair_delayed_set_stat may need to update the struct.  */
-	  delay_set_stat (file_name,
-			  NULL, mode & ~ current_umask, MODE_RWX,
-			  desired_mode, AT_SYMLINK_NOFOLLOW);
+	  if (! one_top_level_prepare)
+	    delay_set_stat (file_name,
+			    NULL, mode & ~ current_umask, MODE_RWX,
+			    desired_mode, AT_SYMLINK_NOFOLLOW);
 	  if (interdir_made)
 	    *interdir_made = true;
 	  print_for_mkdir (file_name, desired_mode);
@@ -1049,7 +1052,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool metadata_set)
 	      && memeq (file_name, data->file_name, data->file_name_len)))
 	break;
 
-      chdir_do (data->change_dir);
+      chdir_do (data->change_dir, false);
 
       if (check_for_renamed_directories)
 	{
@@ -1129,6 +1132,73 @@ safe_dir_mode (struct stat const *st)
 	  | (we_are_root ? 0 : MODE_WXUSR));
 }
 
+/* Trimmed version of extract_dir, to create a dir that is not in the
+   archive, including parents.  Should behave like extract_dir when
+   NO_OVERWRITE_DIR_OLD_FILES is set in order to avoid changing existing
+   paths if they are in the way.
+*/
+bool
+create_dir (char const *file_name)
+{
+  int status;
+  mode_t mode;
+  bool interdir_made = false;
+  /* exists only to avoid passing a const pointer to make_directories */
+  char *unconst_file_name;
+
+  mode = MODE_RWX & ~ newdir_umask;
+
+  for (;;)
+    {
+      struct fdbase f = fdbase (file_name);
+      status = f.fd == BADFD ? -1 : mkdirat (f.fd, f.base, mode);
+      if (status == 0)
+	{
+	  return true;
+	}
+
+      if (errno == EEXIST)
+	{
+	  struct stat st;
+	  st.st_mode = 0;
+
+	  if (is_directory_link (file_name, &st))
+	    return true;
+
+	  if ((st.st_mode != 0 && fstatat_flags == 0)
+	      || deref_stat (file_name, &st) == 0)
+	    {
+	      if (S_ISDIR (st.st_mode))
+		{
+		  return true;
+		}
+	    }
+	  errno = EEXIST;
+	  break;
+	}
+      else if (errno != ENOENT || interdir_made)
+	{
+	  /* The error is not due to missing parent, or we already
+	     tried to make the parent directories and succeeded, so
+	     there must be another problem. No point in retrying. */
+	  break;
+	}
+      unconst_file_name = xstrdup (file_name);
+      if (make_directories (unconst_file_name, &interdir_made) == 0)
+	{
+	  free (unconst_file_name);
+	  continue;
+	}
+      else
+	{
+	  free (unconst_file_name);
+	  break;
+	}
+    }
+  mkdir_error (file_name);
+  return false;
+}
+
 /* Extractor functions for various member types */
 
 static bool
@@ -1895,7 +1965,7 @@ extract_archive (void)
     {
       idx_t dir = chdir_current;
       apply_nonancestor_delayed_set_stat (current_stat_info.file_name, false);
-      chdir_do (dir);
+      chdir_do (dir, false);
     }
 
   /* Take a safety backup of a previously existing file.  */
@@ -1916,7 +1986,16 @@ extract_archive (void)
 
   tar_extractor_t fun = prepare_to_extract (current_stat_info.file_name,
 					    typeflag);
-  bool ok = fun && fun (current_stat_info.file_name, typeflag);
+  bool ok = false;
+  if (fun)
+    {
+      /* Create one_top_level dir if it does not exist.  */
+      one_top_level_prepare = true;
+      chdir_do (chdir_current, !!one_top_level_dir);
+      one_top_level_prepare = false;
+      if (fun (current_stat_info.file_name, typeflag))
+	ok = true;
+    }
   skip_member ();
   if (!ok && backup_option)
     undo_last_backup ();
@@ -1928,7 +2007,7 @@ apply_delayed_link (struct delayed_link *ds)
 {
   char const *valid_source = NULL;
 
-  chdir_do (ds->change_dir);
+  chdir_do (ds->change_dir, false);
 
   for (struct string_list *sources = ds->sources;
        sources;
diff --git a/src/list.c b/src/list.c
index 5ab6de42..277468b9 100644
--- a/src/list.c
+++ b/src/list.c
@@ -136,16 +136,28 @@ enforce_one_top_level (char **pfile_name)
       idx_t pos = strlen (one_top_level_dir);
       if (strncmp (p, one_top_level_dir, pos) == 0)
 	{
-	  if (ISSLASH (p[pos]) || p[pos] == 0)
-	    return;
+	  /* Remove the one_top_level_dir prefix if it ends at
+	     component boundary.  */
+	  if (ISSLASH (p[pos]))
+	    {
+	      *pfile_name = xstrdup (p[pos+1] ? &p[pos+1] : ".");
+	      free (file_name);
+	      return;
+	    }
+	  else if (p[pos] == 0)
+	    {
+	      *pfile_name = xstrdup (".");
+	      free (file_name);
+	      return;
+	    }
 	}
-
-      *pfile_name = make_file_name (one_top_level_dir, file_name);
-      normalize_filename_x (*pfile_name);
+      /* If the prefix does not match, do nothing.  */
     }
   else
-    *pfile_name = xstrdup (one_top_level_dir);
-  free (file_name);
+    {
+      *pfile_name = xstrdup (".");
+      free (file_name);
+    }
 }
 
 bool
@@ -171,7 +183,14 @@ transform_stat_info (char typeflag, struct tar_stat_info *stat_info)
     }
 
   if (one_top_level_dir)
-    enforce_one_top_level (&stat_info->file_name);
+    {
+      enforce_one_top_level (&stat_info->file_name);
+      /* Hard links are interpreted relative to cwd, and --one-top-level
+	 works by means of a hidden change of cwd to the requested directory.
+	 Adjust hard link targets as well.  */
+      if (typeflag == LNKTYPE)
+	enforce_one_top_level (&stat_info->link_name);
+    }
   return true;
 }
 
diff --git a/src/misc.c b/src/misc.c
index e7b13719..04a33d18 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -24,6 +24,7 @@
 #include <xgetcwd.h>
 #include <unlinkdir.h>
 #include <utimens.h>
+#include <assert.h>
 
 #ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT
 # define DOUBLE_SLASH_IS_DISTINCT_ROOT 0
@@ -969,6 +970,7 @@ struct wd
      to be used.  */
   int fd;
 
+  bool one_top_level;
   /* If ID.err is zero, the directory's identity;
      if positive, a failure indication with errno = ID.err;
      if negative, no attempt has been made yet to get the identity.  */
@@ -1000,7 +1002,17 @@ static idx_t wdcache_count;
 idx_t
 chdir_count (void)
 {
-  return wd_count - !!wd_count;
+  idx_t count = 0;
+  if (wd_count)
+    {
+      /* Do not count the initial CWD entry -> start at 1.  */
+      for (idx_t i = 1; i < wd_count; i++)
+	{
+	  if (! wd[i].one_top_level)
+	    count++;
+	}
+    }
+  return count;
 }
 
 /* Grow the WD table by at least one entry.  */
@@ -1015,15 +1027,27 @@ grow_wd (void)
       wd[wd_count].abspath = NULL;
       wd[wd_count].fd = AT_FDCWD;
       wd[wd_count].id.err = -1;
+      wd[wd_count].one_top_level = false;
       wd_count++;
+      if (one_top_level_dir)
+	{
+	  wd[wd_count].name = one_top_level_dir;
+	  wd[wd_count].abspath = NULL;
+	  wd[wd_count].fd = 0;
+	  wd[wd_count].id.err = -1;
+	  wd[wd_count].one_top_level = true;
+	  wd_count++;
+	}
     }
 }
 
 /* DIR is the operand of a -C option; add it to vector of chdir targets,
    and return the index of its location.  */
 idx_t
-chdir_arg (char const *dir)
+chdir_arg (char const *dir, bool one_top_level)
 {
+  if (one_top_level)
+    chdir_arg (dir, false);
   if (wd_count == wd_alloc)
     grow_wd ();
 
@@ -1033,13 +1057,22 @@ chdir_arg (char const *dir)
     {
       dir += dotslashlen (dir);
       if (! dir[dir[0] == '.'])
-	return wd_count - 1;
+	{
+	  if (wd[wd_count - 1].one_top_level == one_top_level)
+	    return wd_count - 1;
+	  else
+	    return wd_count - 2;
+	}
     }
 
+  if (one_top_level)
+    dir = one_top_level_dir;
+
   wd[wd_count].name = dir;
   wd[wd_count].abspath = NULL;
   wd[wd_count].fd = 0;
   wd[wd_count].id.err = -1;
+  wd[wd_count].one_top_level = one_top_level;
   return wd_count++;
 }
 
@@ -1058,21 +1091,63 @@ static int chdir_fd = AT_FDCWD;
    working directory; otherwise, I must be a value returned by
    chdir_arg.  */
 void
-chdir_do (idx_t i)
+chdir_do (idx_t i, bool create)
 {
-  if (chdir_current != i)
-    {
-      struct wd *curr = &wd[i];
-      int fd = curr->fd;
+  struct wd *curr = &wd[i];
+  int fd = curr->fd;
+
+  /* Nothing to create unless we are at the one_top_level dir that has
+     not been created yet.  */
+  create = create && curr->one_top_level && (fd == BADFD || fd == 0);
 
-      if (! fd)
+  if (chdir_current != i || create)
+    {
+      if (! fd || create)
 	{
 	  if (! IS_ABSOLUTE_FILE_NAME (curr->name))
-	    chdir_do (i - 1);
+	    {
+	      idx_t j = i - 1;
+	      if (wd[j].one_top_level)
+		{
+		  j--;
+		  assert (! wd[j].one_top_level);
+		}
+	      chdir_do (j, false);
+	    }
 	  fd = openat (chdir_fd, curr->name,
 		       open_searchdir_how.flags & ~O_NOFOLLOW);
 	  if (fd < 0)
-	    open_fatal (curr->name);
+	    {
+	      if (create)
+		{
+		  struct open_how saved_open_searchdir_how = open_searchdir_how;
+		  /* Don't use O_BENEATH during creation of the
+		     directory. The one-top-level directory is
+		     allowed to be given as an absolute path.  */
+		  open_searchdir_how.resolve = 0;
+		  if (create_dir (curr->name))
+		    /* Directory created, retry */
+		    fd = openat (chdir_fd, curr->name,
+				 open_searchdir_how.flags & ~O_NOFOLLOW);
+		  open_searchdir_how = saved_open_searchdir_how;
+		  /* Either the creation or open failed */
+		  if (fd < 0)
+		    open_fatal (curr->name);
+		}
+	      else if (errno == ENOENT && curr->one_top_level)
+		{
+		  /* We are requested to not create the directory now. Mark it
+		     as to be created later when called with create == true. */
+		  chdir_fd = curr->fd = BADFD;
+		  chdir_current = i;
+		  /* Do not add it to the cache */
+		  return;
+		}
+	      else
+		{
+		  open_fatal (curr->name);
+		}
+	    }
 
 	  curr->fd = fd;
 
@@ -1090,7 +1165,7 @@ chdir_do (idx_t i)
 	    }
 	}
 
-      if (0 < fd)
+      if (0 < fd && /* no assumption about sign of BADFD */ fd != BADFD)
 	{
 	  /* Move the i value to the front of the cache.  This is
 	     O(CHDIR_CACHE_SIZE), but the cache is small.  */
@@ -1194,6 +1269,14 @@ fdbase_opendir (char const *file_name, bool alternate)
 {
   char const *name = file_name;
 
+  if (chdir_fd == BADFD && ! IS_ABSOLUTE_FILE_NAME (file_name))
+    {
+      /* BADFD is a sentinel value meaning that the chdir directory
+	 needs to be created lazily, therefore if we encounter it, the
+	 directory does not exist yet. */
+      errno = ENOENT;
+      return (struct fdbase) { .fd = chdir_fd, .base = name };
+    }
   /* Skip past leading "./"s,
      but not past the last "./" if that ends the name.  */
   idx_t dslen = dotslashlen (name);
@@ -1323,12 +1406,13 @@ tar_getcdpath (idx_t idx)
   if (!wd[idx].abspath)
     {
       idx_t save_cwdi = chdir_current, i = idx;
-      while (0 < i && !wd[i - 1].abspath)
+      while (0 < i && (!wd[i - 1].abspath || wd[i - 1].one_top_level))
 	i--;
 
       for (; i <= idx; i++)
 	{
-	  chdir_do (i);
+	  if (!wd[i].one_top_level)
+	    chdir_do (i, false);
 	  if (i == 0)
 	    {
 	      if ((wd[i].abspath = xgetcwd ()) == NULL)
@@ -1341,13 +1425,18 @@ tar_getcdpath (idx_t idx)
 	    wd[i].abspath = xstrdup (wd[i].name);
 	  else
 	    {
-	      namebuf_t nbuf = namebuf_create (wd[i - 1].abspath);
+	      idx_t j = i - 1;
+	      if (wd[j].one_top_level)
+		{
+		  j--;
+		  assert (! wd[j].one_top_level);
+		}
+	      namebuf_t nbuf = namebuf_create (wd[j].abspath);
 	      namebuf_add_dir (nbuf, wd[i].name);
 	      wd[i].abspath = namebuf_finish (nbuf);
 	    }
 	}
-
-      chdir_do (save_cwdi);
+      chdir_do (save_cwdi, false);
     }
 
   return wd[idx].abspath;
diff --git a/src/names.c b/src/names.c
index 1b8131c5..2ea6d7ff 100644
--- a/src/names.c
+++ b/src/names.c
@@ -875,6 +875,7 @@ static idx_t name_buffer_length; /* allocated length of name_buffer */
 void
 name_init (void)
 {
+  chdir_do (chdir_arg (".", !!one_top_level_dir), false);
   name_list_adjust ();
 }
 
@@ -1118,7 +1119,8 @@ name_next_elt (bool change_dirs)
 	case NELT_CHDIR:
 	  if (change_dirs)
 	    {
-	      chdir_do (chdir_arg (xstrdup (ep->v.name)));
+	      chdir_do (chdir_arg (xstrdup (ep->v.name), !!one_top_level_dir),
+			false);
 	      name_list_advance ();
 	      break;
 	    }
@@ -1181,7 +1183,7 @@ name_gather (void)
       static idx_t change_dir;
 
       while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
-	change_dir = chdir_arg (xstrdup (ep->v.name));
+	change_dir = chdir_arg (xstrdup (ep->v.name), !!one_top_level_dir);
 
       if (ep)
 	{
@@ -1210,7 +1212,7 @@ name_gather (void)
 	{
 	  idx_t change_dir0 = change_dir;
 	  while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
-	    change_dir = chdir_arg (xstrdup (ep->v.name));
+	    change_dir = chdir_arg (xstrdup (ep->v.name), !!one_top_level_dir);
 
 	  if (ep)
 	    addname (ep->v.name, change_dir, true, NULL);
@@ -1339,7 +1341,7 @@ name_match (const char *file_name)
 
       if (cursor->name[0] == 0)
 	{
-	  chdir_do (cursor->change_dir);
+	  chdir_do (cursor->change_dir, false);
 	  namelist = NULL;
 	  nametail = NULL;
 	  return true;
@@ -1383,7 +1385,7 @@ name_match (const char *file_name)
 	    return false;
 
 	  /* We got a match. */
-	  chdir_do (found->change_dir);
+	  chdir_do (found->change_dir, false);
 	  return true;
 	}
 
@@ -1785,7 +1787,7 @@ collect_and_sort_names (void)
 	/* NOTE: EXCLUDE_ANCHORED is not relevant here */
 	/* FIXME: just skip regexps for now */
 	continue;
-      chdir_do (name->change_dir);
+      chdir_do (name->change_dir, false);
 
       if (name->name[0] == 0)
 	continue;
@@ -1931,7 +1933,7 @@ name_from_list (void)
     {
       if (!gnu_list_name->is_wildcard)
 	gnu_list_name->found_count++;
-      chdir_do (gnu_list_name->change_dir);
+      chdir_do (gnu_list_name->change_dir, false);
       return gnu_list_name;
     }
   return NULL;
diff --git a/src/tar.c b/src/tar.c
index 57609d96..58856eb9 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -2683,10 +2683,7 @@ decode_options (int argc, char **argv)
 			"please set it explicitly with --one-top-level=DIR"));
 	}
 
-      if (one_top_level_dir
-	  && ! (*one_top_level_dir
-		&& IS_RELATIVE_FILE_NAME (one_top_level_dir)))
-	paxusage(_("--one-top-level=DIR must use a relative file name"));
+      normalize_filename_x (one_top_level_dir);
     }
 
   /* If ready to unlink hierarchies, so we are for simpler files.  */
diff --git a/src/unlink.c b/src/unlink.c
index 58187415..d809ad44 100644
--- a/src/unlink.c
+++ b/src/unlink.c
@@ -92,7 +92,7 @@ flush_deferred_unlinks (bool force)
       if (force
 	  || p->records_written < records_written)
 	{
-	  chdir_do (p->dir_idx);
+	  chdir_do (p->dir_idx, false);
 	  if (p->is_dir)
 	    {
 	      const char *fname;
@@ -163,11 +163,11 @@ flush_deferred_unlinks (bool force)
 	  struct deferred_unlink *next = p->next;
 	  const char *fname;
 
-	  chdir_do (p->dir_idx);
+	  chdir_do (p->dir_idx, false);
 	  if (p->dir_idx && is_cwd (p))
 	    {
 	      fname = tar_dirname ();
-	      chdir_do (p->dir_idx - 1);
+	      chdir_do (p->dir_idx - 1, false);
 	    }
 	  else
 	    fname = p->file_name;
@@ -184,7 +184,7 @@ flush_deferred_unlinks (bool force)
       dunlink_head = dunlink_tail = NULL;
     }
 
-  chdir_do (saved_chdir);
+  chdir_do (saved_chdir, false);
 }
 
 void
diff --git a/src/update.c b/src/update.c
index 872e701f..fa4883b0 100644
--- a/src/update.c
+++ b/src/update.c
@@ -133,7 +133,7 @@ update_archive (void)
 	      {
 		struct stat s;
 
-		chdir_do (name->change_dir);
+		chdir_do (name->change_dir, false);
 		if (deref_stat (current_stat_info.file_name, &s) == 0)
 		  {
 		    if (S_ISDIR (s.st_mode))
-- 
2.54.0

Reply via email to