In perl.git, the branch maint-5.10 has been updated <http://perl5.git.perl.org/perl.git/commitdiff/fd0641b9e7839bf20fbb5628a96a68b04a624a89?hp=f0cd3a15af3180b43fa205a883038c03057a6a9b>
- Log ----------------------------------------------------------------- commit fd0641b9e7839bf20fbb5628a96a68b04a624a89 Author: Jos I. Boumans <k...@dwim.org> Date: Sat Dec 13 19:08:13 2008 +0100 Update Archive::Tar to 1.42 From: "Jos I. Boumans" <j...@dwim.org> Message-Id: <5b9b0070-0f59-4182-bf11-3a27487b1...@dwim.org> p4raw-id: //depot/p...@35099 (cherry picked from commit 2610e7a4309c5895eb0ce025e439914ec3b3f6c3) M MANIFEST M lib/Archive/Tar.pm M lib/Archive/Tar/Constant.pm M lib/Archive/Tar/t/02_methods.t A lib/Archive/Tar/t/99_pod.t M lib/Archive/Tar/t/src/linktest/linktest_missing_dir.tar.packed M lib/Archive/Tar/t/src/linktest/linktest_with_dir.tar.packed M lib/Archive/Tar/t/src/long/bar.tar.packed M lib/Archive/Tar/t/src/long/foo.tbz.packed M lib/Archive/Tar/t/src/long/foo.tgz.packed M lib/Archive/Tar/t/src/short/bar.tar.packed M lib/Archive/Tar/t/src/short/foo.tbz.packed M lib/Archive/Tar/t/src/short/foo.tgz.packed commit cf038d36b56493225070f6ea9bf3705a5476bb77 Author: Jos I. Boumans <k...@dwim.org> Date: Mon Oct 13 17:40:22 2008 +0200 Update Archive::Tar to 1.40 From: "Jos I. Boumans" <j...@dwim.org> Message-Id: <d694d518-2404-4476-b578-a5b95f896...@dwim.org> ...minus the Pod tests that we've been regularly removing. p4raw-id: //depot/p...@34486 (cherry picked from commit f56953582ae4af437649f099e3968dfe2c4718c9) M lib/Archive/Tar.pm M lib/Archive/Tar/File.pm M lib/Archive/Tar/t/02_methods.t M lib/Archive/Tar/t/04_resolved_issues.t M lib/Archive/Tar/t/src/linktest/linktest_missing_dir.tar.packed M lib/Archive/Tar/t/src/linktest/linktest_with_dir.tar.packed M lib/Archive/Tar/t/src/long/bar.tar.packed M lib/Archive/Tar/t/src/long/foo.tbz.packed M lib/Archive/Tar/t/src/long/foo.tgz.packed M lib/Archive/Tar/t/src/short/bar.tar.packed M lib/Archive/Tar/t/src/short/foo.tbz.packed M lib/Archive/Tar/t/src/short/foo.tgz.packed commit de13a8c27cddc174f3d65f77276984ec78c7e41b Author: Steve Hay <steve...@planit.com> Date: Wed Oct 1 16:55:42 2008 +0000 Fix Archive-Tar's 02_methods.t when IO::Compress::Bzip2 is absent p4raw-id: //depot/p...@34453 (cherry picked from commit 5ccadbde5117ca2083980e2ed7e909f56552e473) M lib/Archive/Tar/t/02_methods.t commit 9abe975d921a062c9f3dd47c6055002b63cb5bae Author: Steve Hay <steve...@planit.com> Date: Wed Oct 1 16:53:57 2008 +0000 Upgrade to Archive-Tar-1.39_04 Local change 32352 remains in 04_resolved_issues.t p4raw-id: //depot/p...@34452 (cherry picked from commit 642eb38136f2ca16919538298be0521b16a2091e) M MANIFEST M lib/Archive/Tar.pm M lib/Archive/Tar/Constant.pm M lib/Archive/Tar/File.pm M lib/Archive/Tar/bin/ptar M lib/Archive/Tar/t/02_methods.t A lib/Archive/Tar/t/05_iter.t A lib/Archive/Tar/t/90_symlink.t A lib/Archive/Tar/t/src/linktest/linktest_missing_dir.tar.packed A lib/Archive/Tar/t/src/linktest/linktest_with_dir.tar.packed A lib/Archive/Tar/t/src/long/foo.tbz.packed A lib/Archive/Tar/t/src/short/foo.tbz.packed commit 70323ad7f3d8f46a651e2697daed4e5dc43f8174 Author: Aaron Crane <p...@aaroncrane.co.uk> Date: Mon Dec 15 11:13:17 2008 +0000 Typo in pod/perlfunc.pod Message-ID: <20081215111317.gi7...@aaroncrane.co.uk> p4raw-id: //depot/p...@35101 (cherry picked from commit ca8d723e9504508322389fed1274da4bbaed2dfb) M pod/perlfunc.pod commit 1ee085db49711c8f67281cba09d9169306fc344f Author: Nicholas Clark <n...@ccl4.org> Date: Sun Dec 14 09:26:00 2008 +0000 Fix change 35082 by manually expanding do_open() to Perl_do_openn(). p4raw-id: //depot/p...@35088 (cherry picked from commit 7f8ee4bea33e6e23ac4302695a9b26db779fd38a) M doio.c ----------------------------------------------------------------------- Summary of changes: MANIFEST | 7 + doio.c | 10 +- lib/Archive/Tar.pm | 496 ++++++++++++++------ lib/Archive/Tar/Constant.pm | 38 +- lib/Archive/Tar/File.pm | 91 +++- lib/Archive/Tar/bin/ptar | 105 +++-- lib/Archive/Tar/t/02_methods.t | 347 +++++++------- lib/Archive/Tar/t/04_resolved_issues.t | 9 +- lib/Archive/Tar/t/05_iter.t | 65 +++ lib/Archive/Tar/t/90_symlink.t | 62 +++ lib/Archive/Tar/t/99_pod.t | 24 + .../src/linktest/linktest_missing_dir.tar.packed} | 24 +- .../linktest_with_dir.tar.packed} | 47 +-- lib/Archive/Tar/t/src/long/bar.tar.packed | 2 +- lib/Archive/Tar/t/src/long/foo.tbz.packed | 23 + lib/Archive/Tar/t/src/long/foo.tgz.packed | 2 +- lib/Archive/Tar/t/src/short/bar.tar.packed | 2 +- lib/Archive/Tar/t/src/short/foo.tbz.packed | 19 + lib/Archive/Tar/t/src/short/foo.tgz.packed | 2 +- pod/perlfunc.pod | 4 +- 20 files changed, 928 insertions(+), 451 deletions(-) create mode 100644 lib/Archive/Tar/t/05_iter.t create mode 100644 lib/Archive/Tar/t/90_symlink.t create mode 100644 lib/Archive/Tar/t/99_pod.t copy lib/Archive/{Extract/t/src/x.tar.packed => Tar/t/src/linktest/linktest_missing_dir.tar.packed} (94%) copy lib/Archive/Tar/t/src/{short/bar.tar.packed => linktest/linktest_with_dir.tar.packed} (87%) create mode 100644 lib/Archive/Tar/t/src/long/foo.tbz.packed create mode 100644 lib/Archive/Tar/t/src/short/foo.tbz.packed diff --git a/MANIFEST b/MANIFEST index f73ddbd..e681d53 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1630,11 +1630,18 @@ lib/Archive/Tar/t/01_use.t Archive::Tar tests lib/Archive/Tar/t/02_methods.t Archive::Tar tests lib/Archive/Tar/t/03_file.t Archive::Tar tests lib/Archive/Tar/t/04_resolved_issues.t Archive::Tar tests +lib/Archive/Tar/t/05_iter.t Archive::Tar tests +lib/Archive/Tar/t/90_symlink.t Archive::Tar tests +lib/Archive/Tar/t/99_pod.t Archive::Tar tests +lib/Archive/Tar/t/src/linktest/linktest_missing_dir.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/linktest/linktest_with_dir.tar.packed Archive::Tar tests lib/Archive/Tar/t/src/long/b Archive::Tar tests lib/Archive/Tar/t/src/long/bar.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/long/foo.tbz.packed Archive::Tar tests lib/Archive/Tar/t/src/long/foo.tgz.packed Archive::Tar tests lib/Archive/Tar/t/src/short/b Archive::Tar tests lib/Archive/Tar/t/src/short/bar.tar.packed Archive::Tar tests +lib/Archive/Tar/t/src/short/foo.tbz.packed Archive::Tar tests lib/Archive/Tar/t/src/short/foo.tgz.packed Archive::Tar tests lib/assert.pl assertion and panic with stack trace lib/Attribute/Handlers/Changes Attribute::Handlers diff --git a/doio.c b/doio.c index f744f84..68bd495 100644 --- a/doio.c +++ b/doio.c @@ -841,14 +841,14 @@ Perl_nextargv(pTHX_ register GV *gv) sv_setpvn(sv,PL_oldname,oldlen); SETERRNO(0,0); /* in case sprintf set errno */ - if (!do_open(PL_argvoutgv,(char*)SvPVX_const(sv),SvCUR(sv),TRUE, + if (!Perl_do_openn(aTHX_ PL_argvoutgv, (char*)SvPVX_const(sv), + SvCUR(sv), TRUE, #ifdef VMS - O_WRONLY|O_CREAT|O_TRUNC,0, + O_WRONLY|O_CREAT|O_TRUNC,0, #else - O_WRONLY|O_CREAT|OPEN_EXCL,0600, + O_WRONLY|O_CREAT|OPEN_EXCL,0600, #endif - NULL)) - { + NULL, NULL, 0)) { if (ckWARN_d(WARN_INPLACE)) Perl_warner(aTHX_ packWARN(WARN_INPLACE), "Can't do inplace edit on %s: %s", PL_oldname, Strerror(errno) ); diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm index 508bcfe..53022e6 100644 --- a/lib/Archive/Tar.pm +++ b/lib/Archive/Tar.pm @@ -7,16 +7,31 @@ package Archive::Tar; require 5.005_03; +use Cwd; +use IO::Zlib; +use IO::File; +use Carp qw(carp croak); +use File::Spec (); +use File::Spec::Unix (); +use File::Path (); + +use Archive::Tar::File; +use Archive::Tar::Constant; + +require Exporter; + use strict; use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING - $INSECURE_EXTRACT_MODE + $INSECURE_EXTRACT_MODE @ISA @EXPORT ]; +...@isa = qw[Exporter]; +...@export = ( COMPRESS_GZIP, COMPRESS_BZIP ); $DEBUG = 0; $WARN = 1; $FOLLOW_SYMLINK = 0; -$VERSION = "1.38"; +$VERSION = "1.42"; $CHOWN = 1; $CHMOD = 1; $DO_NOT_USE_PREFIX = 0; @@ -28,24 +43,13 @@ BEGIN { ### try and load IO::String anyway, so you can dynamically ### switch between perlio and IO::String - eval { + $HAS_IO_STRING = eval { require IO::String; import IO::String; - }; - $HAS_IO_STRING = $@ ? 0 : 1; - + 1; + } || 0; } -use Cwd; -use IO::File; -use Carp qw(carp croak); -use File::Spec (); -use File::Spec::Unix (); -use File::Path (); - -use Archive::Tar::File; -use Archive::Tar::Constant; - =head1 NAME Archive::Tar - module for manipulations of tar archives @@ -55,7 +59,7 @@ Archive::Tar - module for manipulations of tar archives use Archive::Tar; my $tar = Archive::Tar->new; - $tar->read('origin.tgz',1); + $tar->read('origin.tgz'); $tar->extract(); $tar->add_files('file/foo.pl', 'docs/README'); @@ -63,7 +67,9 @@ Archive::Tar - module for manipulations of tar archives $tar->rename('oldname', 'new/file/name'); - $tar->write('files.tar'); + $tar->write('files.tar'); # plain tar + $tar->write('files.tgz', COMPRESSED_GZIP); # gzip compressed + $tar->write('files.tbz', COMPRESSED_BZIP); # bzip2 compressed =head1 DESCRIPTION @@ -122,23 +128,25 @@ sub new { return $obj; } -=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} ) +=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) Read the given tar file into memory. The first argument can either be the name of a file or a reference to an already open filehandle (or an IO::Zlib object if it's compressed) -The second argument indicates whether the file referenced by the first -argument is compressed. The C<read> will I<replace> any previous content in C<$tar>! -The second argument may be considered optional if IO::Zlib is -installed, since it will transparently Do The Right Thing. -Archive::Tar will warn if you try to pass a compressed file if -IO::Zlib is not available and simply return. +The second argument may be considered optional, but remains for +backwards compatibility. Archive::Tar now looks at the file +magic to determine what class should be used to open the file +and will transparently Do The Right Thing. + +Archive::Tar will warn if you try to pass a bzip2 compressed file and the +IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. Note that you can currently B<not> pass a C<gzip> compressed -filehandle, which is not opened with C<IO::Zlib>, nor a string +filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed +filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string containing the full archive information (either compressed or uncompressed). These are worth while features, but not currently implemented. See the C<TODO> section. @@ -153,12 +161,18 @@ all options are case-sensitive. Do not read more than C<limit> files. This is useful if you have very big archives, and are only interested in the first few files. +=item filter + +Can be set to a regular expression. Only files with names that match +the expression will be read. + =item extract If set to true, immediately extract entries when reading them. This gives you the same memory break as the C<extract_archive> function. Note however that entries will not be read into memory, but written -straight to disk. +straight to disk. This means no C<Archive::Tar::File> objects are +created for you to inspect. =back @@ -194,49 +208,97 @@ sub read { } sub _get_handle { - my $self = shift; - my $file = shift; return unless defined $file; - return $file if ref $file; - - my $gzip = shift || 0; - my $mode = shift || READ_ONLY->( ZLIB ); # default to read only - - my $fh; my $bin; - - ### only default to ZLIB if we're not trying to /write/ to a handle ### - if( ZLIB and $gzip || MODE_READ->( $mode ) ) { - - ### IO::Zlib will Do The Right Thing, even when passed - ### a plain file ### - $fh = new IO::Zlib; + my $self = shift; + my $file = shift; return unless defined $file; + return $file if ref $file; + my $compress = shift || 0; + my $mode = shift || READ_ONLY->( ZLIB ); # default to read only + + + ### get a FH opened to the right class, so we can use it transparently + ### throughout the program + my $fh; + { ### reading magic only makes sense if we're opening a file for + ### reading. otherwise, just use what the user requested. + my $magic = ''; + if( MODE_READ->($mode) ) { + open my $tmp, $file or do { + $self->_error( qq[Could not open '$file' for reading: $!] ); + return; + }; + + ### read the first 4 bites of the file to figure out which class to + ### use to open the file. + sysread( $tmp, $magic, 4 ); + close $tmp; + } - } else { - if( $gzip ) { - $self->_error(qq[Compression not available - Install IO::Zlib!]); - return; + ### is it bzip? + ### if you asked specifically for bzip compression, or if we're in + ### read mode and the magic numbers add up, use bzip + if( BZIP and ( + ($compress eq COMPRESS_BZIP) or + ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) + ) + ) { + + ### different reader/writer modules, different error vars... sigh + if( MODE_READ->($mode) ) { + $fh = IO::Uncompress::Bunzip2->new( $file ) or do { + $self->_error( qq[Could not read '$file': ] . + $IO::Uncompress::Bunzip2::Bunzip2Error + ); + return; + }; + + } else { + $fh = IO::Compress::Bzip2->new( $file ) or do { + $self->_error( qq[Could not write to '$file': ] . + $IO::Compress::Bzip2::Bzip2Error + ); + return; + }; + } + + ### is it gzip? + ### if you asked for compression, if you wanted to read or the gzip + ### magic number is present (redundant with read) + } elsif( ZLIB and ( + $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM + ) + ) { + $fh = IO::Zlib->new; + unless( $fh->open( $file, $mode ) ) { + $self->_error(qq[Could not create filehandle for '$file': $!]); + return; + } + + ### is it plain tar? } else { - $fh = new IO::File; - $bin++; - } - } + $fh = IO::File->new; - unless( $fh->open( $file, $mode ) ) { - $self->_error( qq[Could not create filehandle for '$file': $!!] ); - return; - } + unless( $fh->open( $file, $mode ) ) { + $self->_error(qq[Could not create filehandle for '$file': $!]); + return; + } - binmode $fh if $bin; + ### enable bin mode on tar archives + binmode $fh; + } + } return $fh; } + sub _read_tar { my $self = shift; my $handle = shift or return; my $opts = shift || {}; my $count = $opts->{limit} || 0; + my $filter = $opts->{filter}; my $extract = $opts->{extract} || 0; ### set a cap on the amount of files to extract ### @@ -372,6 +434,17 @@ sub _read_tar { undef $real_name; } + ### skip this entry if we're filtering + if ($filter && $entry->name !~ $filter) { + next LOOP; + + ### skip this entry if it's a pax header. This is a special file added + ### by, among others, git-generated tarballs. It holds comments and is + ### not meant for extracting. See #38932: pax_global_header extracted + } elsif ( $entry->name eq PAX_HEADER ) { + next LOOP; + } + $self->_extract_file( $entry ) if $extract && !$entry->is_longlink && !$entry->is_unknown @@ -544,7 +617,7 @@ sub _extract_file { my $dir; ### is $name an absolute path? ### - if( File::Spec->file_name_is_absolute( $dirs ) ) { + if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { ### absolute names are not allowed to be in tarballs under ### strict mode, so only allow it if a user tells us to do it @@ -557,34 +630,70 @@ sub _extract_file { } ### user asked us to, it's fine. - $dir = $dirs; + $dir = File::Spec->catpath( $vol, $dirs, "" ); ### it's a relative path ### } else { - my $cwd = (defined $self->{cwd} ? $self->{cwd} : cwd()); + my $cwd = (ref $self and defined $self->{cwd}) + ? $self->{cwd} + : cwd(); my @dirs = defined $alt ? File::Spec->splitdir( $dirs ) # It's a local-OS path : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely # straight from the tarball - ### paths that leave the current directory are not allowed under - ### strict mode, so only allow it if a user tells us to do this. if( not defined $alt and - not $INSECURE_EXTRACT_MODE and - grep { $_ eq '..' } @dirs - ) { - $self->_error( - q[Entry ']. $entry->full_path .q[' is attempting to leave the ]. - q[current working directory. Not extracting under SECURE ]. - q[EXTRACT MODE] - ); - return; - } + not $INSECURE_EXTRACT_MODE + ) { + + ### paths that leave the current directory are not allowed under + ### strict mode, so only allow it if a user tells us to do this. + if( grep { $_ eq '..' } @dirs ) { + + $self->_error( + q[Entry ']. $entry->full_path .q[' is attempting to leave ]. + q[the current working directory. Not extracting under ]. + q[SECURE EXTRACT MODE] + ); + return; + } - ### '.' is the directory delimiter, of which the first one has to - ### be escaped/changed. - map tr/\./_/, @dirs if ON_VMS; + ### the archive may be asking us to extract into a symlink. This + ### is not sane and a possible security issue, as outlined here: + ### https://rt.cpan.org/Ticket/Display.html?id=30380 + ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 + ### https://issues.rpath.com/browse/RPL-1716 + my $full_path = $cwd; + for my $d ( @dirs ) { + $full_path = File::Spec->catdir( $full_path, $d ); + + ### we've already checked this one, and it's safe. Move on. + next if ref $self and $self->{_link_cache}->{$full_path}; + + if( -l $full_path ) { + my $to = readlink $full_path; + my $diag = "symlinked directory ($full_path => $to)"; + + $self->_error( + q[Entry ']. $entry->full_path .q[' is attempting to ]. + qq[extract to a $diag. This is considered a security ]. + q[vulnerability and not allowed under SECURE EXTRACT ]. + q[MODE] + ); + return; + } + + ### XXX keep a cache if possible, so the stats become cheaper: + $self->{_link_cache}->{$full_path} = 1 if ref $self; + } + } + + ### '.' is the directory delimiter on VMS, which has to be escaped + ### or changed to '_' on vms. vmsify is used, because older versions + ### of vmspath do not handle this properly. + ### Must not add a '/' to an empty directory though. + map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; my ($cwd_vol,$cwd_dir,$cwd_file) = File::Spec->splitpath( $cwd ); @@ -606,7 +715,8 @@ sub _extract_file { $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' ); - ### catdir() returns undef if the path is longer than 255 chars on VMS + ### catdir() returns undef if the path is longer than 255 chars on + ### older VMS systems. unless ( defined $dir ) { $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); return; @@ -622,7 +732,8 @@ sub _extract_file { unless ( -d _ ) { eval { File::Path::mkpath( $dir, 0, 0777 ) }; if( $@ ) { - $self->_error( qq[Could not create directory '$dir': $...@] ); + my $fp = $entry->full_path; + $self->_error(qq[Could not create directory '$dir' for '$fp': $...@]); return; } @@ -672,10 +783,15 @@ sub _extract_file { $self->_make_special_file( $entry, $full ) or return; } - utime time, $entry->mtime - TIME_OFFSET, $full or - $self->_error( qq[Could not update timestamp] ); + ### only update the timestamp if it's not a symlink; that will change the + ### timestamp of the original. This addresses bug #33669: Could not update + ### timestamp warning on symlinks + if( not -l $full ) { + utime time, $entry->mtime - TIME_OFFSET, $full or + $self->_error( qq[Could not update timestamp] ); + } - if( $CHOWN && CAN_CHOWN ) { + if( $CHOWN && CAN_CHOWN->() ) { chown $entry->uid, $entry->gid, $full or $self->_error( qq[Could not set uid/gid on '$full'] ); } @@ -707,8 +823,8 @@ sub _make_special_file { or $fail++; } - $err = qq[Making symbolink link from '] . $entry->linkname . - qq[' to '$file' failed] if $fail; + $err = qq[Making symbolic link '$file' to '] . + $entry->linkname .q[' failed] if $fail; } elsif ( $entry->is_hardlink ) { my $fail; @@ -949,17 +1065,23 @@ sub clear { Write the in-memory archive to disk. The first argument can either be the name of a file or a reference to an already open filehandle (a -GLOB reference). If the second argument is true, the module will use -IO::Zlib to write the file in a compressed format. If IO::Zlib is -not available, the C<write> method will fail and return. +GLOB reference). + +The second argument is used to indicate compression. You can either +compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed +to be the C<gzip> compression level (between 1 and 9), but the use of +constants is prefered: + + # write a gzip compressed file + $tar->write( 'out.tgz', COMPRESSION_GZIP ); + + # write a bzip compressed file + $tar->write( 'out.tbz', COMPRESSION_BZIP ); Note that when you pass in a filehandle, the compression argument is ignored, as all files are printed verbatim to your filehandle. If you wish to enable compression with filehandles, use an -C<IO::Zlib> filehandle instead. - -Specific levels of compression can be chosen by passing the values 2 -through 9 as the second parameter. +C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. The third argument is an optional prefix. All files will be tucked away in the directory you specify as prefix. So if you have files @@ -970,6 +1092,7 @@ If no arguments are given, C<write> returns the entire formatted archive as a string, which could be useful if you'd like to stuff the archive into a socket or a pipe to gzip or something. + =cut sub write { @@ -1177,6 +1300,10 @@ I<Stuffit Expander> on MacOS. Be aware that the file's type/creator and resource fork will be lost, which is usually what you want in cross-platform archives. +Instead of a filename, you can also pass it an existing C<Archive::Tar::File> +object from, for example, another archive. The object will be clone, and +effectively be a copy of the original, not an alias. + Returns a list of C<Archive::Tar::File> objects that were just added. =cut @@ -1187,6 +1314,15 @@ sub add_files { my @rv; for my $file ( @files ) { + + ### you passed an Archive::Tar::File object + ### clone it so we don't accidentally have a reference to + ### an object from another archive + if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { + push @rv, $file->clone; + next; + } + unless( -e $file || -l $file ) { $self->_error( qq[No such file: '$file'] ); next; @@ -1350,56 +1486,29 @@ sub setcwd { $self->{cwd} = $cwd; } -=head2 $bool = $tar->has_io_string - -Returns true if we currently have C<IO::String> support loaded. - -Either C<IO::String> or C<perlio> support is needed to support writing -stringified archives. Currently, C<perlio> is the preferred method, if -available. - -See the C<GLOBAL VARIABLES> section to see how to change this preference. - -=cut - -sub has_io_string { return $HAS_IO_STRING; } - -=head2 $bool = $tar->has_perlio - -Returns true if we currently have C<perlio> support loaded. - -This requires C<perl-5.8> or higher, compiled with C<perlio> - -Either C<IO::String> or C<perlio> support is needed to support writing -stringified archives. Currently, C<perlio> is the preferred method, if -available. - -See the C<GLOBAL VARIABLES> section to see how to change this preference. - -=cut - -sub has_perlio { return $HAS_PERLIO; } - - =head1 Class Methods -=head2 Archive::Tar->create_archive($file, $compression, @filelist) +=head2 Archive::Tar->create_archive($file, $compressed, @filelist) Creates a tar file from the list of files provided. The first argument can either be the name of the tar file to create or a reference to an open file handle (e.g. a GLOB reference). -The second argument specifies the level of compression to be used, if -any. Compression of tar files requires the installation of the -IO::Zlib module. Specific levels of compression may be -requested by passing a value between 2 and 9 as the second argument. -Any other value evaluating as true will result in the default -compression level being used. +The second argument is used to indicate compression. You can either +compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed +to be the C<gzip> compression level (between 1 and 9), but the use of +constants is prefered: + + # write a gzip compressed file + Archive::Tar->create_archive( 'out.tgz', COMPRESSION_GZIP, @filelist ); + + # write a bzip compressed file + Archive::Tar->create_archive( 'out.tbz', COMPRESSION_BZIP, @filelist ); Note that when you pass in a filehandle, the compression argument is ignored, as all files are printed verbatim to your filehandle. If you wish to enable compression with filehandles, use an -C<IO::Zlib> filehandle instead. +C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. The remaining arguments list the files to be included in the tar file. These files must all exist. Any files which don't exist or can't be @@ -1431,7 +1540,63 @@ sub create_archive { return $tar->write( $file, $gzip ); } -=head2 Archive::Tar->list_archive ($file, $compressed, [...@properties]) +=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) + +Returns an iterator function that reads the tar file without loading +it all in memory. Each time the function is called it will return the +next file in the tarball. The files are returned as +C<Archive::Tar::File> objects. The iterator function returns the +empty list once it has exhausted the the files contained. + +The second argument can be a hash reference with options, which are +identical to the arguments passed to C<read()>. + +Example usage: + + my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); + + while( my $f = $next->() ) { + print $f->name, "\n"; + + $f->extract or warn "Extraction failed"; + + # .... + } + +=cut + + +sub iter { + my $class = shift; + my $filename = shift or return; + my $compressed = shift or 0; + my $opts = shift || {}; + + ### get a handle to read from. + my $handle = $class->_get_handle( + $filename, + $compressed, + READ_ONLY->( ZLIB ) + ) or return; + + my @data; + return sub { + return shift(@data) if @data; # more than one file returned? + return unless $handle; # handle exhausted? + + ### read data, should only return file + @data = @{ $class->_read_tar($handle, { %$opts, limit => 1 }) }; + + ### return one piece of data + return shift(@data) if @data; + + ### data is exhausted, free the filehandle + undef $handle; + return; + }; +} + +=head2 Archive::Tar->list_archive($file, $compressed, [...@properties]) Returns a list of the names of all the files in the archive. The first argument can either be the name of the tar file to list or a @@ -1462,7 +1627,7 @@ sub list_archive { return $tar->list_files( @_ ); } -=head2 Archive::Tar->extract_archive ($file, $gzip) +=head2 Archive::Tar->extract_archive($file, $compressed) Extracts the contents of the tar file. The first argument can either be the name of the tar file to create or a reference to an open file @@ -1486,11 +1651,57 @@ sub extract_archive { return $tar->read( $file, $gzip, { extract => 1 } ); } +=head2 $bool = Archive::Tar->has_io_string + +Returns true if we currently have C<IO::String> support loaded. + +Either C<IO::String> or C<perlio> support is needed to support writing +stringified archives. Currently, C<perlio> is the preferred method, if +available. + +See the C<GLOBAL VARIABLES> section to see how to change this preference. + +=cut + +sub has_io_string { return $HAS_IO_STRING; } + +=head2 $bool = Archive::Tar->has_perlio + +Returns true if we currently have C<perlio> support loaded. + +This requires C<perl-5.8> or higher, compiled with C<perlio> + +Either C<IO::String> or C<perlio> support is needed to support writing +stringified archives. Currently, C<perlio> is the preferred method, if +available. + +See the C<GLOBAL VARIABLES> section to see how to change this preference. + +=cut + +sub has_perlio { return $HAS_PERLIO; } + +=head2 $bool = Archive::Tar->has_zlib_support + +Returns true if C<Archive::Tar> can extract C<zlib> compressed archives + +=cut + +sub has_zlib_support { return ZLIB } + +=head2 $bool = Archive::Tar->has_bzip2_support + +Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives + +=cut + +sub has_bzip2_support { return BZIP } + =head2 Archive::Tar->can_handle_compressed_files A simple checking routine, which will return true if C<Archive::Tar> -is able to uncompress compressed archives on the fly with C<IO::Zlib>, -or false if C<IO::Zlib> is not installed. +is able to uncompress compressed archives on the fly with C<IO::Zlib> +and C<IO::Compress::Bzip2> or false if not both are installed. You can use this as a shortcut to determine whether C<Archive::Tar> will do what you think before passing compressed archives to its @@ -1498,7 +1709,7 @@ C<read> method. =cut -sub can_handle_compressed_files { return ZLIB ? 1 : 0 } +sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } sub no_string_support { croak("You have to install IO::String to support writing archives to strings"); @@ -1645,18 +1856,24 @@ Yes it is, see previous answer. Since C<Compress::Zlib> and therefore C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little choice but to read the archive into memory. This is ok if you want to do in-memory manipulation of the archive. + If you just want to extract, use the C<extract_archive> class method instead. It will optimize and write to disk immediately. -=item Can't you lazy-load data instead? +Another option is to use the C<iter> class method to iterate over +the files in the tarball without reading them all in memory at once. + +=item Can you lazy-load data instead? -No, not easily. See previous question. +In some cases, yes. You can use the C<iter> class method to iterate +over the files in the tarball without reading them all in memory at once. =item How much memory will an X kb tar file need? Probably more than X kb, since it will all be read into memory. If this is a problem, and you don't need to do in memory manipulation -of the archive, consider using C</bin/tar> instead. +of the archive, consider using the C<iter> class method, or C</bin/tar> +instead. =item What do you do with unsupported filetypes in an archive? @@ -1666,8 +1883,9 @@ try to make a copy of the original file, rather than throwing an error. This does require you to read the entire archive in to memory first, since otherwise we wouldn't know what data to fill the copy with. -(This means that you cannot use the class methods on archives that -have incompatible filetypes and still expect things to work). +(This means that you cannot use the class methods, including C<iter> +on archives that have incompatible filetypes and still expect things +to work). For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that the extraction of this particular item didn't work. @@ -1852,12 +2070,12 @@ Please reports bugs to E<lt>bug-archive-...@rt.cpan.orge<gt>. =head1 ACKNOWLEDGEMENTS -Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and -especially Andrew Savige for their help and suggestions. +Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas +and especially Andrew Savige for their help and suggestions. =head1 COPYRIGHT -This module is copyright (c) 2002 - 2007 Jos Boumans +This module is copyright (c) 2002 - 2008 Jos Boumans E<lt>k...@cpan.orge<gt>. All rights reserved. This library is free software; you may redistribute and/or modify diff --git a/lib/Archive/Tar/Constant.pm b/lib/Archive/Tar/Constant.pm index 00416d5..aef1d62 100644 --- a/lib/Archive/Tar/Constant.pm +++ b/lib/Archive/Tar/Constant.pm @@ -2,20 +2,16 @@ package Archive::Tar::Constant; BEGIN { require Exporter; - $VERSION= '0.02'; - @ISA = qw[Exporter]; - @EXPORT = qw[ - FILE HARDLINK SYMLINK CHARDEV BLOCKDEV DIR FIFO SOCKET UNKNOWN - BUFFER HEAD READ_ONLY WRITE_ONLY UNPACK PACK TIME_OFFSET ZLIB - BLOCK_SIZE TAR_PAD TAR_END ON_UNIX BLOCK CAN_READLINK MAGIC - TAR_VERSION UNAME GNAME CAN_CHOWN MODE CHECK_SUM UID GID - GZIP_MAGIC_NUM MODE_READ LONGLINK LONGLINK_NAME PREFIX_LENGTH - LABEL NAME_LENGTH STRIP_MODE ON_VMS - ]; + + $VERSION = '0.02'; + @ISA = qw[Exporter]; require Time::Local if $^O eq "MacOS"; } +use Package::Constants; +...@export = Package::Constants->list( __PACKAGE__ ); + use constant FILE => 0; use constant HARDLINK => 1; use constant SYMLINK => 2; @@ -32,6 +28,9 @@ use constant BUFFER => 4096; use constant HEAD => 512; use constant BLOCK => 512; +use constant COMPRESS_GZIP => 9; +use constant COMPRESS_BZIP => 'bzip2'; + use constant BLOCK_SIZE => sub { my $n = int($_[0]/BLOCK); $n++ if $_[0] % BLOCK; $n * BLOCK }; use constant TAR_PAD => sub { my $x = shift || return; return "\0" x (BLOCK - ($x % BLOCK) ) }; use constant TAR_END => "\0" x BLOCK; @@ -61,16 +60,25 @@ use constant TIME_OFFSET => ($^O eq "MacOS") ? Time::Local::timelocal(0,0,0,1 use constant MAGIC => "ustar"; use constant TAR_VERSION => "00"; use constant LONGLINK_NAME => '././@LongLink'; +use constant PAX_HEADER => 'pax_global_header'; - ### allow ZLIB to be turned off using ENV - ### DEBUG only + ### allow ZLIB to be turned off using ENV: DEBUG only use constant ZLIB => do { !$ENV{'PERL5_AT_NO_ZLIB'} and eval { require IO::Zlib }; - $ENV{'PERL5_AT_NO_ZLIB'} || $@ ? 0 : 1 }; - + $ENV{'PERL5_AT_NO_ZLIB'} || $@ ? 0 : 1 + }; + + ### allow BZIP to be turned off using ENV: DEBUG only +use constant BZIP => do { !$ENV{'PERL5_AT_NO_BZIP'} and + eval { require IO::Uncompress::Bunzip2; + require IO::Compress::Bzip2; }; + $ENV{'PERL5_AT_NO_BZIP'} || $@ ? 0 : 1 + }; + use constant GZIP_MAGIC_NUM => qr/^(?:\037\213|\037\235)/; +use constant BZIP_MAGIC_NUM => qr/^BZh\d/; -use constant CAN_CHOWN => do { ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32") }; +use constant CAN_CHOWN => sub { ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32") }; use constant CAN_READLINK => ($^O ne 'MSWin32' and $^O !~ /RISC(?:[ _])?OS/i and $^O ne 'VMS'); use constant ON_UNIX => ($^O ne 'MSWin32' and $^O ne 'MacOS' and $^O ne 'VMS'); use constant ON_VMS => $^O eq 'VMS'; diff --git a/lib/Archive/Tar/File.pm b/lib/Archive/Tar/File.pm index 8c96577..ead236f 100644 --- a/lib/Archive/Tar/File.pm +++ b/lib/Archive/Tar/File.pm @@ -1,15 +1,18 @@ package Archive::Tar::File; use strict; +use Carp (); use IO::File; use File::Spec::Unix (); use File::Spec (); use File::Basename (); +### avoid circular use, so only require; +require Archive::Tar; use Archive::Tar::Constant; use vars q...@isa $VERSION]; -...@isa = qw[Archive::Tar]; +...@isa = qw[Archive::Tar]; $VERSION = '0.02'; ### set value to 1 to oct() it during the unpack ### @@ -154,13 +157,13 @@ Raw tar header -- not useful for most users =head1 Methods -=head2 new( file => $path ) +=head2 Archive::Tar::File->new( file => $path ) Returns a new Archive::Tar::File object from an existing file. Returns undef on failure. -=head2 new( data => $path, $data, $opt ) +=head2 Archive::Tar::File->new( data => $path, $data, $opt ) Returns a new Archive::Tar::File object from data. @@ -171,7 +174,7 @@ tar header), which are described above in the Accessors section. Returns undef on failure. -=head2 new( chunk => $chunk ) +=head2 Archive::Tar::File->new( chunk => $chunk ) Returns a new Archive::Tar::File object from a raw 512-byte tar archive chunk. @@ -266,6 +269,29 @@ sub _new_from_file { my @items = qw[mode uid gid size mtime]; my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; + if (ON_VMS) { + ### VMS has two UID modes, traditional and POSIX. Normally POSIX is + ### not used. We currently do not have an easy way to see if we are in + ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. + ### The VMS UIC has the upper 16 bits is the GID, which in many cases + ### the VMS UIC will be larger than 209715, the largest that TAR can + ### handle. So for now, assume it is traditional if the UID is larger + ### than 0x10000. + + if ($hash{uid} > 0x10000) { + $hash{uid} = $hash{uid} & 0xFFFF; + } + + ### The file length from stat() is the physical length of the file + ### However the amount of data read in may be more for some file types. + ### Fixed length files are read past the logical EOF to end of the block + ### containing. Other file types get expanded on read because record + ### delimiters are added. + + my $data_len = length $data; + $hash{size} = $data_len if $hash{size} < $data_len; + + } ### you *must* set size == 0 on symlinks, or the next entry will be ### though of as the contents of the symlink, which is wrong. ### this fixes bug #7937 @@ -367,6 +393,9 @@ sub _prefix_and_file { ### if it's a directory, then $file might be empty $file = pop @dirs if $self->is_dir and not length $file; + ### splitting ../ gives you the relative path in native syntax + map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS; + my $prefix = File::Spec::Unix->catdir( grep { length } $vol, @dirs ); @@ -411,7 +440,25 @@ sub _downgrade_to_plainfile { return 1; } -=head2 full_path +=head2 $bool = $file->extract( [ $alternative_name ] ) + +Extract this object, optionally to an alternative name. + +See C<< Archive::Tar->extract_file >> for details. + +Returns true on success and false on failure. + +=cut + +sub extract { + my $self = shift; + + local $Carp::CarpLevel += 1; + + return Archive::Tar->_extract_file( $self, @_ ); +} + +=head2 $path = $file->full_path Returns the full path from the tar header; this is basically a concatenation of the C<prefix> and C<name> fields. **** PATCH TRUNCATED AT 1000 LINES -- 1396 NOT SHOWN **** -- Perl5 Master Repository