Warnings and workarounds concering tarbombs (archives not storing their contents within a single directory) have pervaded the free software community for years. However, GNU tar still does not have an option to deal with them. This implements a request made on the official website in 2007. During extraction the new option conditionally creates a directory derived from the basename of the archive, falling back to the usual method if the directory already exists.
Signed-off-by: Connor Behan <connor.be...@gmail.com> --- doc/tar.texi | 14 ++++++++++ src/common.h | 3 +++ src/extract.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/tar.c | 11 ++++++++ 4 files changed, 115 insertions(+) diff --git a/doc/tar.texi b/doc/tar.texi index 2661174..518602f 100644 --- a/doc/tar.texi +++ b/doc/tar.texi @@ -2795,6 +2795,20 @@ at the end of each tape. If it exits with nonzero status, @command{tar} fails immediately. @xref{info-script}, for a detailed discussion of this feature. +@opsummary{intelligent-subdir} +@item --intelligent-subdir + +Tells @command{tar} to extract files into a newly created directory if an +extraction would otherwise place more than one file in the archive's +parent directory. This guards against so-called tarbombs. The name of the +new directory is a substring of the basename of the file from the +beginning up to and not including the last occurrence of a known three +letter extension: @samp{.tar}, @samp{.taz}, @samp{.tbz}, @samp{.tb2}, +@samp{.tgz}, @samp{.tlz} or @samp{.txz}. For example, offending files +@file{foo.tar}, @file{foo.tar.gz} and @file{foo.tbz2} would be extracted +into @file{foo} while @file{foo.tar.tar} would be extracted into +@file{foo.tar}. + @opsummary{interactive} @item --interactive @itemx --confirmation diff --git a/src/common.h b/src/common.h index 21df8c1..0370e37 100644 --- a/src/common.h +++ b/src/common.h @@ -173,6 +173,9 @@ GLOBAL bool incremental_option; /* Specified name of script to run at end of each tape change. */ GLOBAL const char *info_script_option; +/* Ensure that all extracted files are inside a subdirectory */ +GLOBAL bool intelligent_subdir_option; + GLOBAL bool interactive_option; /* If nonzero, extract only Nth occurrence of each named file */ diff --git a/src/extract.c b/src/extract.c index 319aaa8..a80e557 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1559,6 +1559,89 @@ prepare_to_extract (char const *file_name, int typeflag, tar_extractor_t *fun) return 1; } +void +subdir_manipulation (char **file_name, int typeflag) +{ + static int toplevel_count = 0; + static char *toplevel_file; + static char *toplevel_prefix; + + if (toplevel_count < 2) + { + int i; + for (i = 0; i < strlen (*file_name); i++) + if (ISSLASH ((*file_name)[i])) break; + + if (i == strlen (*file_name)) toplevel_count++; + + if (toplevel_count == 1) + toplevel_file = strdup (*file_name); + else if (toplevel_count == 2) + { + char *base_prefix = base_name (archive_name_array[0]); + toplevel_prefix = xmalloc (strlen (base_prefix) + 2); + + /* Determine the name of the subdirectory to create. */ + for (i = strlen (base_prefix) - 1; i > 2; i--) + if (!strncmp (base_prefix + i - 3, ".tar", 4) || + !strncmp (base_prefix + i - 3, ".taz", 4) || + !strncmp (base_prefix + i - 3, ".tbz", 4) || + !strncmp (base_prefix + i - 3, ".tb2", 4) || + !strncmp (base_prefix + i - 3, ".tgz", 4) || + !strncmp (base_prefix + i - 3, ".tlz", 4) || + !strncmp (base_prefix + i - 3, ".txz", 4)) break; + + if (i == 2) + strcpy (toplevel_prefix, base_prefix); + else + { + strncpy (toplevel_prefix, base_prefix, i - 3); + toplevel_prefix[i - 3] = '\0'; + } + + strcat (toplevel_prefix, "/"); + int new_size = strlen (toplevel_prefix) + strlen (toplevel_file) + 1; + char *target = xmalloc (new_size); + + /* Move the one file that has been put in the wrong place. */ + strcpy (target, toplevel_prefix); + strcat (target, toplevel_file); + + if (deref_stat (toplevel_prefix, NULL) != -1 || errno != ENOENT) + { + intelligent_subdir_option = false; + WARN ((0, 0, _("%s already exists, ignoring --intelligent-subdir"), + toplevel_prefix)); + } + else + { + errno = 0; + if (extract_dir (toplevel_prefix, typeflag) + || renameat (chdir_fd, toplevel_file, chdir_fd, target) < 0) + WARN ((0, 0, _("%s will be one level above the other archive contents"), + toplevel_file)); + } + + free (toplevel_file); + free (base_prefix); + free (target); + } + } + + if (toplevel_count >= 2) + { + char *raw_name = strdup (*file_name); + free (*file_name); + + int new_size = strlen (toplevel_prefix) + strlen (raw_name) + 1; + *file_name = xmalloc (new_size); + + strcpy (*file_name, toplevel_prefix); + strcat (*file_name, raw_name); + free (raw_name); + } +} + /* Extract a file from the archive. */ void extract_archive (void) @@ -1609,6 +1692,10 @@ extract_archive (void) typeflag = sparse_member_p (¤t_stat_info) ? GNUTYPE_SPARSE : current_header->header.typeflag; + /* Manipulate where this file should go if we are removing tarbombs. */ + if (intelligent_subdir_option) + subdir_manipulation (¤t_stat_info.file_name, typeflag); + if (prepare_to_extract (current_stat_info.file_name, typeflag, &fun)) { if (fun && (*fun) (current_stat_info.file_name, typeflag) diff --git a/src/tar.c b/src/tar.c index c3c2459..d1a0b21 100644 --- a/src/tar.c +++ b/src/tar.c @@ -286,6 +286,7 @@ enum IGNORE_COMMAND_ERROR_OPTION, IGNORE_FAILED_READ_OPTION, INDEX_FILE_OPTION, + INTELLIGENT_SUBDIR_OPTION, KEEP_NEWER_FILES_OPTION, LEVEL_OPTION, LZIP_OPTION, @@ -484,6 +485,9 @@ static struct argp_option options[] = { {"overwrite-dir", OVERWRITE_DIR_OPTION, 0, 0, N_("overwrite metadata of existing directories when extracting (default)"), GRID+1 }, + {"intelligent-subdir", INTELLIGENT_SUBDIR_OPTION, 0, 0, + N_("create a subdirectory if the archive being extracted " + "has multiple files at the top level"), GRID+1 }, #undef GRID #define GRID 40 @@ -1406,6 +1410,10 @@ parse_opt (int key, char *arg, struct argp_state *state) ignore_zeros_option = true; break; + case INTELLIGENT_SUBDIR_OPTION: + intelligent_subdir_option = true; + break; + case 'j': set_use_compress_program_option (BZIP2_PROGRAM); break; @@ -2373,6 +2381,9 @@ decode_options (int argc, char **argv) subcommand_string (subcommand_option))); } + if (intelligent_subdir_option && absolute_names_option) + USAGE_ERROR ((0, 0, _("--intelligent-subdir cannot be used with --absolute-names"))); + if (archive_names == 0) { /* If no archive file name given, try TAPE from the environment, or -- 1.8.3.4