Hello Paul,

thank you for all your advice. I made a fix that allows absolute
paths to --one-top-level again and confines extraction to the 
--one-top-level directory (instead of to the current or -C directory).

The patch leverages the existing -C code. In order to do that, every
entry in the wd[] table gets another companion entry in the table that
represents the --one-top-level directory. There is one additional field
in each entry that allows skipping the companion entries if they are not
desired.

The actual directory is created lazily by chdir_do() if needed, as you
requested, to avoid empty "a/foo" after --one-top-level=foo -C a -C b.

The patch "by the way" fixes also extraction of hardlinks with
--one-top-level which currently is broken in the typical case (the
transform is not applied to the target, so the hardlink is wrong).

The patch does not yet handle the --show-transformed case with
--one-top-level that you discussed in another subthread. As a result, two
tests now fail (onetop02.at and onetop04.at). I suppose that this would
be quite easy to fix. 

Another issue that I am aware of is that I am not sure whether to call
repair_delayed_set_stat and/or delay_set_stat on the newly created
directories like extract_dir() does (the whole delay_set code is abit
mysterious to me).

Use of --create together with --one-top-level should probably be
forbidden, as unlink.c uses wd[] in a way that will likely break in the
presence of companion entries (the chdir_do call in
flush_deferred_unlinks).

Other than that, I believe that the patch is fairly complete, although
of course it needs to be better commented and documentation updated.
Please have a look.

Best regards, Pavel
diff --git a/src/common.h b/src/common.h
index 032c0a1f..6a197d6b 100644
--- a/src/common.h
+++ b/src/common.h
@@ -559,6 +559,7 @@ void verify_volume (void);
 extern dev_t root_device;
 
 void extr_init (void);
+bool create_dir (char const *file_name);
 void extract_archive (void);
 void extract_finish (void);
 bool rename_directory (char *src, char *dst);
@@ -765,8 +766,8 @@ idx_t blocking_write (int fd, void const *buf, idx_t count);
 enum { BADFD = AT_FDCWD == -1 ? -2 : -1 };
 
 extern idx_t chdir_current;
-idx_t chdir_arg (char const *dir);
-void chdir_do (idx_t dir);
+idx_t chdir_arg (char const *dir, bool one_top_level);
+void chdir_do (idx_t dir, bool create);
 struct chdir_id { int err; dev_t st_dev; ino_t st_ino; } chdir_id (void);
 struct fdbase { int fd; char const *base; } fdbase (char const *);
 struct fdbase fdbase1 (char const *);
diff --git a/src/extract.c b/src/extract.c
index ab83a650..93a78b91 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -1049,7 +1049,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool metadata_set)
 	      && memeq (file_name, data->file_name, data->file_name_len)))
 	break;
 
-      chdir_do (data->change_dir);
+      chdir_do (data->change_dir, false);
 
       if (check_for_renamed_directories)
 	{
@@ -1129,6 +1129,73 @@ safe_dir_mode (struct stat const *st)
 	  | (we_are_root ? 0 : MODE_WXUSR));
 }
 
+/* Trimmed version of extract_dir, to create a dir that is not in the
+   archive, including parents.  Should behave like extract_dir when
+   NO_OVERWRITE_DIR_OLD_FILES is set in order to avoid changing existing
+   paths if they are in the way.
+*/
+bool
+create_dir (char const *file_name)
+{
+  int status;
+  mode_t mode;
+  bool interdir_made = false;
+  /* exists only to avoid passing a const pointer to make_directories */
+  char *unconst_file_name;
+
+  mode = MODE_RWX & ~ newdir_umask;
+
+  for (;;)
+    {
+      struct fdbase f = fdbase (file_name);
+      status = f.fd == BADFD ? -1 : mkdirat (f.fd, f.base, mode);
+      if (status == 0)
+	{
+	  return true;
+	}
+
+      if (errno == EEXIST)
+	{
+	  struct stat st;
+	  st.st_mode = 0;
+
+	  if (is_directory_link (file_name, &st))
+	    return true;
+
+	  if ((st.st_mode != 0 && fstatat_flags == 0)
+	      || deref_stat (file_name, &st) == 0)
+	    {
+	      if (S_ISDIR (st.st_mode))
+		{
+		  return true;
+		}
+	    }
+	  errno = EEXIST;
+	  break;
+	}
+      else if (errno != ENOENT || interdir_made)
+	{
+	  /* The error is not due to missing parent, or we already
+	     tried to make the parent directories and succeeded, so
+	     there must be another problem. No point in retrying. */
+	  break;
+	}
+      unconst_file_name = xstrdup (file_name);
+      if (make_directories (unconst_file_name, &interdir_made) == 0)
+	{
+	  free (unconst_file_name);
+	  continue;
+	}
+      else
+	{
+	  free (unconst_file_name);
+	  break;
+	}
+    }
+  mkdir_error (file_name);
+  return false;
+}
+
 /* Extractor functions for various member types */
 
 static bool
@@ -1900,7 +1967,7 @@ extract_archive (void)
     {
       idx_t dir = chdir_current;
       apply_nonancestor_delayed_set_stat (current_stat_info.file_name, false);
-      chdir_do (dir);
+      chdir_do (dir, false);
     }
 
   /* Take a safety backup of a previously existing file.  */
@@ -1923,6 +1990,8 @@ extract_archive (void)
 					    typeflag);
   if (fun)
     {
+      /* create one_top_level dir if it does not exist */
+      chdir_do (chdir_current, one_top_level_option);
       if (fun (current_stat_info.file_name, typeflag))
 	return;
     }
@@ -1939,7 +2008,7 @@ apply_delayed_link (struct delayed_link *ds)
 {
   char const *valid_source = NULL;
 
-  chdir_do (ds->change_dir);
+  chdir_do (ds->change_dir, false);
 
   for (struct string_list *sources = ds->sources;
        sources;
diff --git a/src/list.c b/src/list.c
index d541cf26..6567a4a8 100644
--- a/src/list.c
+++ b/src/list.c
@@ -128,16 +128,27 @@ enforce_one_top_level (char **pfile_name)
       idx_t pos = strlen (one_top_level_dir);
       if (strncmp (p, one_top_level_dir, pos) == 0)
 	{
-	  if (ISSLASH (p[pos]) || p[pos] == 0)
-	    return;
+	  /* remove the one_top_level_dir prefix if it ends at component boundary. */
+	  if (ISSLASH (p[pos]))
+	    {
+	      *pfile_name = xstrdup (p[pos+1] ? &p[pos+1] : ".");
+	      free (file_name);
+	      return;
+	    }
+	  else if (p[pos] == 0)
+	    {
+	      *pfile_name = xstrdup (".");
+	      free (file_name);
+	      return;
+	    }
 	}
-
-      *pfile_name = make_file_name (one_top_level_dir, file_name);
-      normalize_filename_x (*pfile_name);
+      /* if the prefix does not match, do nothing */
     }
   else
-    *pfile_name = xstrdup (one_top_level_dir);
-  free (file_name);
+    {
+      *pfile_name = xstrdup (".");
+      free (file_name);
+    }
 }
 
 bool
@@ -163,7 +174,14 @@ transform_stat_info (char typeflag, struct tar_stat_info *stat_info)
     }
 
   if (one_top_level_option)
-    enforce_one_top_level (&stat_info->file_name);
+    {
+      enforce_one_top_level (&stat_info->file_name);
+      /* Hardlinks are interpreted relative to cwd, and --one-top-level
+       works by means of a hidden change of cwd to the requested directory.
+       Adjust hardlink targets as well. */
+      if (typeflag == LNKTYPE)
+	enforce_one_top_level (&stat_info->link_name);
+    }
   return true;
 }
 
diff --git a/src/misc.c b/src/misc.c
index 02dfbcb4..e6bf5a36 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -24,6 +24,7 @@
 #include <xgetcwd.h>
 #include <unlinkdir.h>
 #include <utimens.h>
+#include <assert.h>
 
 #ifndef DOUBLE_SLASH_IS_DISTINCT_ROOT
 # define DOUBLE_SLASH_IS_DISTINCT_ROOT 0
@@ -969,6 +970,7 @@ struct wd
      to be used.  */
   int fd;
 
+  bool one_top_level;
   /* If ID.err is zero, the directory's identity;
      if positive, a failure indication with errno = ID.err;
      if negative, no attempt has been made yet to get the identity.  */
@@ -1000,7 +1002,17 @@ static idx_t wdcache_count;
 idx_t
 chdir_count (void)
 {
-  return wd_count - !!wd_count;
+  idx_t count = 0;
+  if (wd_count)
+    {
+      /* Do not count the initial CWD entry -> start at 1. */
+      for (idx_t i = 1; i < wd_count; i++)
+	{
+	  if (! wd[i].one_top_level)
+	    count++;
+	}
+    }
+  return count;
 }
 
 /* Grow the WD table by at least one entry.  */
@@ -1015,15 +1027,27 @@ grow_wd (void)
       wd[wd_count].abspath = NULL;
       wd[wd_count].fd = AT_FDCWD;
       wd[wd_count].id.err = -1;
+      wd[wd_count].one_top_level = false;
       wd_count++;
+      if (one_top_level_option)
+	{
+	  wd[wd_count].name = one_top_level_dir;
+	  wd[wd_count].abspath = NULL;
+	  wd[wd_count].fd = 0;
+	  wd[wd_count].id.err = -1;
+	  wd[wd_count].one_top_level = true;
+	  wd_count++;
+	}
     }
 }
 
 /* DIR is the operand of a -C option; add it to vector of chdir targets,
    and return the index of its location.  */
 idx_t
-chdir_arg (char const *dir)
+chdir_arg (char const *dir, bool one_top_level)
 {
+  if (one_top_level)
+    chdir_arg (dir, false);
   if (wd_count == wd_alloc)
     grow_wd ();
 
@@ -1033,13 +1057,22 @@ chdir_arg (char const *dir)
     {
       dir += dotslashlen (dir);
       if (! dir[dir[0] == '.'])
-	return wd_count - 1;
+	{
+	  if (wd[wd_count - 1].one_top_level == one_top_level)
+	    return wd_count - 1;
+	  else
+	    return wd_count - 2;
+	}
     }
 
+  if (one_top_level)
+    dir = one_top_level_dir;
+
   wd[wd_count].name = dir;
   wd[wd_count].abspath = NULL;
   wd[wd_count].fd = 0;
   wd[wd_count].id.err = -1;
+  wd[wd_count].one_top_level = one_top_level;
   return wd_count++;
 }
 
@@ -1058,21 +1091,63 @@ static int chdir_fd = AT_FDCWD;
    working directory; otherwise, I must be a value returned by
    chdir_arg.  */
 void
-chdir_do (idx_t i)
+chdir_do (idx_t i, bool create)
 {
-  if (chdir_current != i)
-    {
-      struct wd *curr = &wd[i];
-      int fd = curr->fd;
+  struct wd *curr = &wd[i];
+  int fd = curr->fd;
+
+  /* nothing to create unless we are at the one_top_level dir that has not been
+   created yet */
+  create = create && curr->one_top_level && (fd == BADFD || fd == 0);
 
-      if (! fd)
+  if (chdir_current != i || create)
+    {
+      if (! fd || create)
 	{
 	  if (! IS_ABSOLUTE_FILE_NAME (curr->name))
-	    chdir_do (i - 1);
+	    {
+	      idx_t j = i - 1;
+	      if (wd[j].one_top_level)
+		{
+		  j--;
+		  assert (! wd[j].one_top_level);
+		}
+	      chdir_do (j, false);
+	    }
 	  fd = openat (chdir_fd, curr->name,
 		       open_searchdir_how.flags & ~O_NOFOLLOW);
 	  if (fd < 0)
-	    open_fatal (curr->name);
+	    {
+	      if (create)
+		{
+		  struct open_how saved_open_searchdir_how = open_searchdir_how;
+		  /* Don't use O_BENEATH during creation of the
+		     directory. The one-top-level directory is
+		     allowed to be given as an absolute path. */
+		  open_searchdir_how.resolve = 0;
+		  if (create_dir (curr->name))
+		    /* Directory created, retry */
+		    fd = openat (chdir_fd, curr->name,
+			     open_searchdir_how.flags & ~O_NOFOLLOW);
+		  open_searchdir_how = saved_open_searchdir_how;
+		  /* Either the creation or open failed */
+		  if (fd < 0)
+		    open_fatal (curr->name);
+		}
+	      else if (errno == ENOENT && curr->one_top_level)
+		{
+		  /* We are requested to not create the directory now. Mark it
+		     as to be created later when called with create == true. */
+		  chdir_fd = curr->fd = BADFD;
+		  chdir_current = i;
+		  /* Do not add it to the cache */
+		  return;
+		}
+	      else
+		{
+		  open_fatal (curr->name);
+		}
+	    }
 
 	  curr->fd = fd;
 
@@ -1090,7 +1165,7 @@ chdir_do (idx_t i)
 	    }
 	}
 
-      if (0 < fd)
+      if (0 < fd && /* no assumption about sign of BADFD */ fd != BADFD)
 	{
 	  /* Move the i value to the front of the cache.  This is
 	     O(CHDIR_CACHE_SIZE), but the cache is small.  */
@@ -1194,6 +1269,14 @@ fdbase_opendir (char const *file_name, bool alternate)
 {
   char const *name = file_name;
 
+  if (chdir_fd == BADFD && ! IS_ABSOLUTE_FILE_NAME (file_name))
+    {
+      /* BADFD is a sentinel value meaning that the chdir directory
+	 needs to be created lazily, therefore if we encounter it, the
+	 directory does not exist yet. */
+      errno = ENOENT;
+      return (struct fdbase) { .fd = chdir_fd, .base = name };
+    }
   /* Skip past leading "./"s,
      but not past the last "./" if that ends the name.  */
   idx_t dslen = dotslashlen (name);
@@ -1323,12 +1406,13 @@ tar_getcdpath (idx_t idx)
   if (!wd[idx].abspath)
     {
       idx_t save_cwdi = chdir_current, i = idx;
-      while (0 < i && !wd[i - 1].abspath)
+      while (0 < i && (!wd[i - 1].abspath || wd[i - 1].one_top_level))
 	i--;
 
       for (; i <= idx; i++)
 	{
-	  chdir_do (i);
+	  if (!wd[i].one_top_level)
+	    chdir_do (i, false);
 	  if (i == 0)
 	    {
 	      if ((wd[i].abspath = xgetcwd ()) == NULL)
@@ -1341,13 +1425,19 @@ tar_getcdpath (idx_t idx)
 	    wd[i].abspath = xstrdup (wd[i].name);
 	  else
 	    {
-	      namebuf_t nbuf = namebuf_create (wd[i - 1].abspath);
+	      idx_t j = i - 1;
+	      if (wd[j].one_top_level)
+		{
+		  j--;
+		  assert (! wd[j].one_top_level);
+		}
+	      namebuf_t nbuf = namebuf_create (wd[j].abspath);
 	      namebuf_add_dir (nbuf, wd[i].name);
 	      wd[i].abspath = namebuf_finish (nbuf);
 	    }
 	}
 
-      chdir_do (save_cwdi);
+      chdir_do (save_cwdi, false);
     }
 
   return wd[idx].abspath;
diff --git a/src/names.c b/src/names.c
index 1b8131c5..ea05b5f8 100644
--- a/src/names.c
+++ b/src/names.c
@@ -875,6 +875,7 @@ static idx_t name_buffer_length; /* allocated length of name_buffer */
 void
 name_init (void)
 {
+  chdir_do (chdir_arg (".", one_top_level_option), false);
   name_list_adjust ();
 }
 
@@ -1118,7 +1119,7 @@ name_next_elt (bool change_dirs)
 	case NELT_CHDIR:
 	  if (change_dirs)
 	    {
-	      chdir_do (chdir_arg (xstrdup (ep->v.name)));
+	      chdir_do (chdir_arg (xstrdup (ep->v.name), one_top_level_option), false);
 	      name_list_advance ();
 	      break;
 	    }
@@ -1181,7 +1182,7 @@ name_gather (void)
       static idx_t change_dir;
 
       while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
-	change_dir = chdir_arg (xstrdup (ep->v.name));
+	change_dir = chdir_arg (xstrdup (ep->v.name), one_top_level_option);
 
       if (ep)
 	{
@@ -1210,7 +1211,7 @@ name_gather (void)
 	{
 	  idx_t change_dir0 = change_dir;
 	  while ((ep = name_next_elt (false)) && ep->type == NELT_CHDIR)
-	    change_dir = chdir_arg (xstrdup (ep->v.name));
+	    change_dir = chdir_arg (xstrdup (ep->v.name), one_top_level_option);
 
 	  if (ep)
 	    addname (ep->v.name, change_dir, true, NULL);
@@ -1339,7 +1340,7 @@ name_match (const char *file_name)
 
       if (cursor->name[0] == 0)
 	{
-	  chdir_do (cursor->change_dir);
+	  chdir_do (cursor->change_dir, false);
 	  namelist = NULL;
 	  nametail = NULL;
 	  return true;
@@ -1383,7 +1384,7 @@ name_match (const char *file_name)
 	    return false;
 
 	  /* We got a match. */
-	  chdir_do (found->change_dir);
+	  chdir_do (found->change_dir, false);
 	  return true;
 	}
 
@@ -1785,7 +1786,7 @@ collect_and_sort_names (void)
 	/* NOTE: EXCLUDE_ANCHORED is not relevant here */
 	/* FIXME: just skip regexps for now */
 	continue;
-      chdir_do (name->change_dir);
+      chdir_do (name->change_dir, false);
 
       if (name->name[0] == 0)
 	continue;
@@ -1931,7 +1932,7 @@ name_from_list (void)
     {
       if (!gnu_list_name->is_wildcard)
 	gnu_list_name->found_count++;
-      chdir_do (gnu_list_name->change_dir);
+      chdir_do (gnu_list_name->change_dir, false);
       return gnu_list_name;
     }
   return NULL;
diff --git a/src/tar.c b/src/tar.c
index 9376b59b..13f678e4 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -2688,8 +2688,7 @@ decode_options (int argc, char **argv)
 			"please set it explicitly with --one-top-level=DIR"));
 	}
 
-      if (one_top_level_dir && !IS_RELATIVE_FILE_NAME (one_top_level_dir))
-	paxusage(_("--one-top-level=DIR must use a relative file name"));
+      normalize_filename_x (one_top_level_dir);
     }
 
   /* If ready to unlink hierarchies, so we are for simpler files.  */
diff --git a/src/unlink.c b/src/unlink.c
index 58187415..d809ad44 100644
--- a/src/unlink.c
+++ b/src/unlink.c
@@ -92,7 +92,7 @@ flush_deferred_unlinks (bool force)
       if (force
 	  || p->records_written < records_written)
 	{
-	  chdir_do (p->dir_idx);
+	  chdir_do (p->dir_idx, false);
 	  if (p->is_dir)
 	    {
 	      const char *fname;
@@ -163,11 +163,11 @@ flush_deferred_unlinks (bool force)
 	  struct deferred_unlink *next = p->next;
 	  const char *fname;
 
-	  chdir_do (p->dir_idx);
+	  chdir_do (p->dir_idx, false);
 	  if (p->dir_idx && is_cwd (p))
 	    {
 	      fname = tar_dirname ();
-	      chdir_do (p->dir_idx - 1);
+	      chdir_do (p->dir_idx - 1, false);
 	    }
 	  else
 	    fname = p->file_name;
@@ -184,7 +184,7 @@ flush_deferred_unlinks (bool force)
       dunlink_head = dunlink_tail = NULL;
     }
 
-  chdir_do (saved_chdir);
+  chdir_do (saved_chdir, false);
 }
 
 void
diff --git a/src/update.c b/src/update.c
index 872e701f..fa4883b0 100644
--- a/src/update.c
+++ b/src/update.c
@@ -133,7 +133,7 @@ update_archive (void)
 	      {
 		struct stat s;
 
-		chdir_do (name->change_dir);
+		chdir_do (name->change_dir, false);
 		if (deref_stat (current_stat_info.file_name, &s) == 0)
 		  {
 		    if (S_ISDIR (s.st_mode))

Reply via email to