From: Harald Hoyer <har...@redhat.com> Having the same files and directories on different locations results in different archives, because the inode numbers and devices are not the same.
The "--reproducible" flag will assign increasing inode numbers to the files, resulting in equal archives for equal files and directories. A hash table is used to find already assigned inode numbers for linked files. --- V2: Added documentation. doc/cpio.1 | 6 +++++- doc/cpio.texi | 6 ++++++ src/copyout.c | 13 ++++++++++++- src/extern.h | 5 +++++ src/global.c | 3 +++ src/main.c | 14 ++++++++++++-- src/util.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 103 insertions(+), 4 deletions(-) diff --git a/doc/cpio.1 b/doc/cpio.1 index d48d2df..8bc2f3c 100644 --- a/doc/cpio.1 +++ b/doc/cpio.1 @@ -13,7 +13,7 @@ .\" .\" You should have received a copy of the GNU General Public License .\" along with GNU cpio. If not, see <http://www.gnu.org/licenses/>. -.TH CPIO 1 "January 30, 2014" "CPIO" "GNU CPIO" +.TH CPIO 1 "August 15, 2014" "CPIO" "GNU CPIO" .SH NAME cpio \- copy files to and from archives .SH SYNOPSIS @@ -29,6 +29,7 @@ cpio \- copy files to and from archives [\fB\-\-block\-size=\fIblocks\fR] [\fB\-\-dereference\fR] [\fB\-\-io\-size=\fIBYTES\fR] [\fB\-\-quiet\fR] [\fB\-\-force\-local\fR] [\fB\-\-rsh\-command=\fICOMMAND\fR] +[\fB\-\-reproducible\fR] < \fIname-list\fR [\fB>\fR \fIarchive\fR] .B cpio @@ -269,6 +270,9 @@ archive. The output archive name can be specified wither using this option, or using \fB\-F\fR (\fB\-\-file\fR), but not both. +.TP +.B \-\-reproducible +Create reproducible inode numbers. .SS Operation modifiers valid only in copy-pass mode .TP .BR \-l ", " \-\-link diff --git a/doc/cpio.texi b/doc/cpio.texi index c1cf11b..66c4a64 100644 --- a/doc/cpio.texi +++ b/doc/cpio.texi @@ -290,6 +290,8 @@ Print @var{string} when the end of a volume of the backup media is reached. @item --quiet Do not print the number of blocks copied. +@item --reproducible +Create reproducible inode numbers. @item --rsh-command=@var{command} Use @var{command} instead of @command{rsh} to access remote archives. @item -R @@ -840,6 +842,10 @@ If the group is omitted but the @samp{:} or @samp{.} separator is given, as in the second example. the given user's login group will be used. +@item --reproducible +[@ref{copy-out}] +@*Create reproducible inode numbers. + @item --rsh-command=@var{command} [@ref{copy-in},@ref{copy-out},@ref{copy-pass}] @*Notifies cpio that is should use @var{command} to communicate with remote diff --git a/src/copyout.c b/src/copyout.c index 673dad4..99af26d 100644 --- a/src/copyout.c +++ b/src/copyout.c @@ -640,7 +640,18 @@ process_copy_out () { /* Set values in output header. */ stat_to_cpio (&file_hdr, &file_stat); - + + if (reproducible_option) + { + file_hdr.c_ino = reproducible_inode_get(file_hdr.c_ino, + file_hdr.c_dev_maj, + file_hdr.c_dev_min, + file_hdr.c_nlink); + file_hdr.c_dev_maj = 0; + file_hdr.c_dev_min = 0; + } + + if (archive_format == arf_tar || archive_format == arf_ustar) { if (file_hdr.c_mode & CP_IFDIR) diff --git a/src/extern.h b/src/extern.h index 92117cd..3347ee4 100644 --- a/src/extern.h +++ b/src/extern.h @@ -64,6 +64,7 @@ extern mode_t newdir_umask; #define CPIO_WARN_ALL (unsigned int)-1 extern bool to_stdout_option; +extern bool reproducible_option; extern int last_header_start; extern int copy_matching_files; @@ -169,6 +170,10 @@ void warn_if_file_changed (char *file_name, off_t old_file_size, time_t old_file_mtime); void create_all_directories (char *name); void prepare_append (int out_file_des); +ino_t reproducible_inode_get (ino_t node_num, + unsigned long major_num, + unsigned long minor_num, + size_t nlink); char *find_inode_file (ino_t node_num, unsigned long major_num, unsigned long minor_num); void add_inode (ino_t node_num, char *file_name, diff --git a/src/global.c b/src/global.c index c699f6e..5519c15 100644 --- a/src/global.c +++ b/src/global.c @@ -184,6 +184,9 @@ unsigned int warn_option = 0; /* Extract to standard output? */ bool to_stdout_option = false; +/* Assign reproducible inode numbers */ +bool reproducible_option = false; + /* The name this program was run with. */ char *program_name; diff --git a/src/main.c b/src/main.c index e1f2c5c..6a61ca8 100644 --- a/src/main.c +++ b/src/main.c @@ -58,7 +58,8 @@ enum cpio_options { FORCE_LOCAL_OPTION, DEBUG_OPTION, BLOCK_SIZE_OPTION, - TO_STDOUT_OPTION + TO_STDOUT_OPTION, + REPRODUCIBLE_OPTION }; const char *program_authors[] = @@ -178,6 +179,8 @@ static struct argp_option options[] = { GRID+1 }, {"to-stdout", TO_STDOUT_OPTION, NULL, 0, N_("Extract files to standard output"), GRID+1 }, + {"reproducible", REPRODUCIBLE_OPTION, NULL, 0, + N_("Create reproducible inode numbers"), GRID+1 }, {NULL, 'I', N_("[[USER@]HOST:]FILE-NAME"), 0, N_("Archive filename to use instead of standard input. Optional USER and HOST specify the user and host names in case of a remote archive"), GRID+1 }, #undef GRID @@ -529,6 +532,10 @@ crc newc odc bin ustar tar (all-caps also recognized)"), arg)); to_stdout_option = true; break; + case REPRODUCIBLE_OPTION: + reproducible_option = true; + break; + default: return ARGP_ERR_UNKNOWN; } @@ -592,6 +599,8 @@ process_args (int argc, char *argv[]) CHECK_USAGE (xstat != lstat, "--dereference", "--extract"); CHECK_USAGE (append_flag, "--append", "--extract"); CHECK_USAGE (output_archive_name, "-O", "--extract"); + CHECK_USAGE (reproducible_option, "--reproducible", "--extract"); + if (to_stdout_option) { CHECK_USAGE (create_dir_flag, "--make-directories", "--to-stdout"); @@ -675,7 +684,8 @@ process_args (int argc, char *argv[]) CHECK_USAGE (no_abs_paths_flag, "--absolute-pathnames", "--pass-through"); CHECK_USAGE (to_stdout_option, "--to-stdout", "--pass-through"); - + CHECK_USAGE (reproducible_option, "--reproducible", "--pass-through"); + directory_name = argv[index]; } diff --git a/src/util.c b/src/util.c index 18b3e42..c8ff9d2 100644 --- a/src/util.c +++ b/src/util.c @@ -683,6 +683,66 @@ prepare_append (int out_file_des) in_buff = input_buffer; } +static ino_t reproducible_inode_counter = 0; + +struct reproducible_inode +{ + ino_t inode; + unsigned long major_num; + unsigned long minor_num; + ino_t reproducible_inode; +}; + +static Hash_table *reproducible_hash_table = NULL; + +static size_t +reproducible_inode_hasher (const void *val, size_t n_buckets) +{ + const struct reproducible_inode *ival = val; + return ival->inode % n_buckets; +} + +static bool +reproducible_inode_compare (const void *val1, const void *val2) +{ + const struct reproducible_inode *ival1 = val1; + const struct reproducible_inode *ival2 = val2; + return ival1->inode == ival2->inode + && ival1->major_num == ival2->major_num + && ival1->minor_num == ival2->minor_num; +} + +ino_t +reproducible_inode_get (ino_t node_num, unsigned long major_num, + unsigned long minor_num, size_t nlink) +{ + struct reproducible_inode *sample; + struct reproducible_inode *ival = NULL; + + if(nlink <= 1) + return ++reproducible_inode_counter; + + /* Create new inode record. */ + sample = (struct reproducible_inode *) xmalloc (sizeof (struct reproducible_inode)); + + sample->inode = node_num; + sample->major_num = major_num; + sample->minor_num = minor_num; + + if (reproducible_hash_table) + ival = hash_lookup (reproducible_hash_table, sample); + + if (ival == NULL) { + sample->reproducible_inode = ++reproducible_inode_counter; + if (!((reproducible_hash_table + || (reproducible_hash_table = hash_initialize (0, 0, reproducible_inode_hasher, + reproducible_inode_compare, 0))) + && (ival = hash_insert (reproducible_hash_table, sample)))) + xalloc_die (); + } + return ival->reproducible_inode; +} + /* Support for remembering inodes with multiple links. Used in the "copy in" and "copy pass" modes for making links instead of copying the file. */ -- 2.0.4