Re: [Bug-tar] added support for lz4 and zstd

2017-08-26 Thread Sven C. Dack

Thank you, Sergey.

I've included an updated patch for lz4+zstd support.

Sven


diff --git a/configure.ac b/configure.ac
index e89ed1d..680dde7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -249,6 +249,8 @@ TAR_COMPR_PROGRAM(bzip2)
 TAR_COMPR_PROGRAM(lzip)
 TAR_COMPR_PROGRAM(lzma)
 TAR_COMPR_PROGRAM(lzop)
+TAR_COMPR_PROGRAM(lz4)
+TAR_COMPR_PROGRAM(zstd)
 TAR_COMPR_PROGRAM(xz)
 
 AC_MSG_CHECKING(for default archive format)
diff --git a/doc/tar.1 b/doc/tar.1
index f5c1fca..3506169 100644
--- a/doc/tar.1
+++ b/doc/tar.1
@@ -13,7 +13,7 @@
 .\"
 .\" You should have received a copy of the GNU General Public License
 .\" along with this program.  If not, see .
-.TH TAR 1 "March 23, 2016" "TAR" "GNU TAR Manual"
+.TH TAR 1 "August 5, 2017" "TAR" "GNU TAR Manual"
 .SH NAME
 tar \- an archiving utility
 .SH SYNOPSIS
@@ -818,6 +818,14 @@ Filter the archive through
 Filter the archive through
 .BR lzop (1).
 .TP
+\fB\-\-lz4\fR
+Filter the archive through
+.BR lz4 (1).
+.TP
+\fB\-\-zstd\fR
+Filter the archive through
+.BR zstd (1).
+.TP
 \fB\-\-no\-auto\-compress\fR
 Do not use archive suffix to determine the compression program.
 .TP
@@ -1285,6 +1293,8 @@ failure during backup to a remote device.
 .BR gzip (1),
 .BR lzma (1),
 .BR lzop (1),
+.BR lz4 (1),
+.BR zstd (1),
 .BR rmt (8),
 .BR symlink (7),
 .BR tar (5),
diff --git a/doc/tar.texi b/doc/tar.texi
index edd190e..1ac1d4b 100644
--- a/doc/tar.texi
+++ b/doc/tar.texi
@@ -2971,6 +2971,16 @@ This option tells @command{tar} to read or write archives through
 This option tells @command{tar} to read or write archives through
 @command{lzop}.  @xref{gzip}.
 
+@item --lz4
+
+This option tells @command{tar} to read or write archives through
+@command{lz4}.  @xref{gzip}.
+
+@item --zstd
+
+This option tells @command{tar} to read or write archives through
+@command{zstd}.  @xref{gzip}.
+
 @opsummary{mode}
 @item --mode=@var{permissions}
 
@@ -9568,14 +9578,17 @@ switch to @samp{posix}.
 @cindex lzip
 @cindex lzma
 @cindex lzop
+@cindex lz4
+@cindex zstd
 @cindex compress
 @GNUTAR{} is able to create and read compressed archives.  It supports
 a wide variety of compression programs, namely: @command{gzip},
 @command{bzip2}, @command{lzip}, @command{lzma}, @command{lzop},
-@command{xz} and traditional @command{compress}. The latter is
-supported mostly for backward compatibility, and we recommend
-against using it, because it is by far less effective than the other
-compression programs@footnote{It also had patent problems in the past.}.
+@command{lz4}, @command{zstd}, @command{xz} and traditional
+@command{compress}.  The latter is supported mostly for backward
+compatibility, and we recommend against using it, because it is by far
+less effective than the other compression programs@footnote{It also
+had patent problems in the past.}.
 
 Creating a compressed archive is simple: you just specify a
 @dfn{compression option} along with the usual archive creation
@@ -9584,10 +9597,11 @@ create a @command{gzip} compressed archive, @option{-j}
 (@option{--bzip2}) to create a @command{bzip2} compressed archive,
 @option{--lzip} to create an @asis{lzip} compressed archive,
 @option{-J} (@option{--xz}) to create an @asis{XZ} archive,
-@option{--lzma} to create an @asis{LZMA} compressed
-archive, @option{--lzop} to create an @asis{LSOP} archive, and
-@option{-Z} (@option{--compress}) to use @command{compress} program.
-For example:
+@option{--lzma} to create an @asis{LZMA} compressed archive,
+@option{--lzop} to create an @asis{LSOP} archive, @option{--lz4} to
+create an @asis{LZ4} archive, @option{--zstd} to create an @asis{ZST}
+archive, and @option{-Z} (@option{--compress}) to use
+@command{compress} program.  For example:
 
 @smallexample
 $ @kbd{tar czf archive.tar.gz .}
@@ -9707,6 +9721,14 @@ Filter the archive through @command{lzma}.
 @item --lzop
 Filter the archive through @command{lzop}.
 
+@opindex lz4
+@item --lz4
+Filter the archive through @command{lz4}.
+
+@opindex zstd
+@item --zstd
+Filter the archive through @command{zstd}.
+
 @opindex compress
 @opindex uncompress
 @item -Z
@@ -9778,6 +9800,8 @@ suffix.  The following suffixes are recognized:
 @item @samp{.lzma} @tab @command{lzma}
 @item @samp{.tlz} @tab @command{lzma}
 @item @samp{.lzo} @tab @command{lzop}
+@item @samp{.lz4} @tab @command{lz4}
+@item @samp{.zst} @tab @command{zstd}
 @item @samp{.xz} @tab @command{xz}
 @end multitable
 
diff --git a/src/buffer.c b/src/buffer.c
index 6f96c2f..d635499 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -281,6 +281,8 @@ enum compress_type {
   ct_lzip,
   ct_lzma,
   ct_lzop,
+  ct_lz4,
+  ct_zstd,
   ct_xz
 };
 
@@ -309,6 +311,8 @@ static struct zip_magic const magic[] = {
   { ct_lzip, 4, "LZIP" },
   { ct_lzma, 6, "\xFFLZMA" },
   { ct_lzop, 4, "\211LZO" },
+  { ct_lz4,  4, "\x04\x22\x4D\x18" },
+  { ct_zstd, 4, "\x28\xB5\x2F\xFD" },
   { ct_xz,   6, "\xFD" "7zXZ" },
 };
 
@@ -324,6 +328,8 @@ static struct 

Re: [Bug-tar] added support for lz4 and zstd

2017-08-24 Thread Eric Blake
On 08/22/2017 01:03 PM, Sven C. Dack wrote:
> Hello,
> 
> I'm sending you an updated patch against the current git master. Note,
> in addition to supporting lz4 and zstd does it add a single 'break;'
> into tar.c at line 1130:
> 
>  case LZOP_OPTION:
>s = xasprintf (_("filter the archive through %s"), LZOP_PROGRAM);
> +  break;
> 
> which previously caused the case to fall-through and for the --help
> option to print a wrong message regarding what gets filter through which
> compression tool. Because it's a single, missing break-statement did I
> not create an extra patch for it.

On the other hand, a separate commit for every bug is a GOOD policy to
abide by, as it makes life easier for downstream distros that want to
backport bug fixes but not features.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Bug-tar] added support for lz4 and zstd

2017-08-22 Thread Sven C. Dack

Hello,

I'm sending you an updated patch against the current git master. Note, 
in addition to supporting lz4 and zstd does it add a single 'break;' 
into tar.c at line 1130:


 case LZOP_OPTION:
   s = xasprintf (_("filter the archive through %s"), LZOP_PROGRAM);
+  break;

which previously caused the case to fall-through and for the --help 
option to print a wrong message regarding what gets filter through which 
compression tool. Because it's a single, missing break-statement did I 
not create an extra patch for it.


Regards,

Sven




On 05/08/17 15:25, Sven C. Dack wrote:

Hello,

I've created a patch for tar, which adds support for lz4 and zstd 
compression. Both compression tools have become increasingly popular 
and offer high-speed compression and decompression, making them 
interesting alternatives to previous compression tools.


lz4 is similar to lzo, but offers faster decompression. zstd is 
similar to gzip, but offers much faster compression and decompression.


See also:

http://lz4.github.io/lz4/

http://facebook.github.io/zstd/

For example, creating an archive from a directory of 1.12 GB 
containing binary and ASCII files ("/usr/local/gnu/"):


Compression

name time size   ratio
gnu.tar.gz 57.2s  534077354  44.3%
gnu.tar.bz2  1:53.9s  509401491  42.3%
gnu.tar.xz   7:15.6s  423886520  35.2%
gnu.tar.lz   7:37.2s  425162846  35.3%
gnu.tar.lzo 4.7s  676569472  56.2%
gnu.tar.lz4 4.9s  676600069  56.2%
gnu.tar.zst10.7s  517897416  43.0%

Decompression

name time
gnu.tar.gz7.1s
gnu.tar.bz2  45.2s
gnu.tar.xz   26.3s
gnu.tar.lz   30.4s
gnu.tar.lzo   3.5s
gnu.tar.lz4   2.0s
gnu.tar.zst   3.2s

A second example using the Linux kernel source code (4.12.4), 
containing 697 MB of data:


Compression

name   time size   ratio
linux.tar.gz 28.2s  156988489  21.4%
linux.tar.bz2  1:05.5s  120659282  16.5%
linux.tar.xz   4:26.9s  102214892  13.9%
linux.tar.lz   4:18.4s  104105244  14.2%
linux.tar.lzo 3.6s  261745137  35.7%
linux.tar.lz4 3.9s  252707015  34.5%
linux.tar.zst 5.9s  150448272  20.5%

Decompression

name   time
linux.tar.gz3.6s
linux.tar.bz2  19.4s
linux.tar.xz7.2s
linux.tar.lz8.1s
linux.tar.lzo   2.5s
linux.tar.lz4   1.5s
linux.tar.zst   2.1s

(The tools were used with their default arguments. The hardware used 
was an AMD FX8350 4GHz.)


Attached files are:

patch-part-1 - diffs of buffer.c config.h.in configure.ac suffix.c tar.c
patch-part-2 - diffs of tar.texi tar.1 cs.po de.po fi.po ga.po id.po 
ru.po sl.po zh_CN.po


Cheers



diff --git a/configure.ac b/configure.ac
index e89ed1d..680dde7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -249,6 +249,8 @@ TAR_COMPR_PROGRAM(bzip2)
 TAR_COMPR_PROGRAM(lzip)
 TAR_COMPR_PROGRAM(lzma)
 TAR_COMPR_PROGRAM(lzop)
+TAR_COMPR_PROGRAM(lz4)
+TAR_COMPR_PROGRAM(zstd)
 TAR_COMPR_PROGRAM(xz)
 
 AC_MSG_CHECKING(for default archive format)
diff --git a/doc/tar.1 b/doc/tar.1
index f5c1fca..3506169 100644
--- a/doc/tar.1
+++ b/doc/tar.1
@@ -13,7 +13,7 @@
 .\"
 .\" You should have received a copy of the GNU General Public License
 .\" along with this program.  If not, see .
-.TH TAR 1 "March 23, 2016" "TAR" "GNU TAR Manual"
+.TH TAR 1 "August 5, 2017" "TAR" "GNU TAR Manual"
 .SH NAME
 tar \- an archiving utility
 .SH SYNOPSIS
@@ -818,6 +818,14 @@ Filter the archive through
 Filter the archive through
 .BR lzop (1).
 .TP
+\fB\-\-lz4\fR
+Filter the archive through
+.BR lz4 (1).
+.TP
+\fB\-\-zstd\fR
+Filter the archive through
+.BR zstd (1).
+.TP
 \fB\-\-no\-auto\-compress\fR
 Do not use archive suffix to determine the compression program.
 .TP
@@ -1285,6 +1293,8 @@ failure during backup to a remote device.
 .BR gzip (1),
 .BR lzma (1),
 .BR lzop (1),
+.BR lz4 (1),
+.BR zstd (1),
 .BR rmt (8),
 .BR symlink (7),
 .BR tar (5),
diff --git a/doc/tar.texi b/doc/tar.texi
index edd190e..1ac1d4b 100644
--- a/doc/tar.texi
+++ b/doc/tar.texi
@@ -2971,6 +2971,16 @@ This option tells @command{tar} to read or write archives through
 This option tells @command{tar} to read or write archives through
 @command{lzop}.  @xref{gzip}.
 
+@item --lz4
+
+This option tells @command{tar} to read or write archives through
+@command{lz4}.  @xref{gzip}.
+
+@item --zstd
+
+This option tells @command{tar} to read or write archives through
+@command{zstd}.  @xref{gzip}.
+
 @opsummary{mode}
 @item --mode=@var{permissions}
 
@@ -9568,14 +9578,17 @@ switch to @samp{posix}.
 @cindex lzip
 @cindex lzma
 @cindex lzop
+@cindex lz4
+@cindex zstd
 @cindex compress
 @GNUTAR{} is able to create and read compressed archives.  It supports
 a wide variety of compression programs, namely: @command{gzip},
 @command{bzip2}, @command{lzip}, @command{lzma}, @command{lzop},
-@command{xz} and traditional @command{compress}. The latter is
-supported mostly for backward compatibility, and we recommend
-against using it, because it is by far less effective than the other

[Bug-tar] added support for lz4 and zstd

2017-08-05 Thread Sven C. Dack

Hello,

I've created a patch for tar, which adds support for lz4 and zstd 
compression. Both compression tools have become increasingly popular and 
offer high-speed compression and decompression, making them interesting 
alternatives to previous compression tools.


lz4 is similar to lzo, but offers faster decompression. zstd is similar 
to gzip, but offers much faster compression and decompression.


See also:

http://lz4.github.io/lz4/

http://facebook.github.io/zstd/

For example, creating an archive from a directory of 1.12 GB containing 
binary and ASCII files ("/usr/local/gnu/"):


Compression

name time size   ratio
gnu.tar.gz 57.2s  534077354  44.3%
gnu.tar.bz2  1:53.9s  509401491  42.3%
gnu.tar.xz   7:15.6s  423886520  35.2%
gnu.tar.lz   7:37.2s  425162846  35.3%
gnu.tar.lzo 4.7s  676569472  56.2%
gnu.tar.lz4 4.9s  676600069  56.2%
gnu.tar.zst10.7s  517897416  43.0%

Decompression

name time
gnu.tar.gz7.1s
gnu.tar.bz2  45.2s
gnu.tar.xz   26.3s
gnu.tar.lz   30.4s
gnu.tar.lzo   3.5s
gnu.tar.lz4   2.0s
gnu.tar.zst   3.2s

A second example using the Linux kernel source code (4.12.4), containing 
697 MB of data:


Compression

name   time size   ratio
linux.tar.gz 28.2s  156988489  21.4%
linux.tar.bz2  1:05.5s  120659282  16.5%
linux.tar.xz   4:26.9s  102214892  13.9%
linux.tar.lz   4:18.4s  104105244  14.2%
linux.tar.lzo 3.6s  261745137  35.7%
linux.tar.lz4 3.9s  252707015  34.5%
linux.tar.zst 5.9s  150448272  20.5%

Decompression

name   time
linux.tar.gz3.6s
linux.tar.bz2  19.4s
linux.tar.xz7.2s
linux.tar.lz8.1s
linux.tar.lzo   2.5s
linux.tar.lz4   1.5s
linux.tar.zst   2.1s

(The tools were used with their default arguments. The hardware used was 
an AMD FX8350 4GHz.)


Attached files are:

patch-part-1 - diffs of buffer.c config.h.in configure.ac suffix.c tar.c
patch-part-2 - diffs of tar.texi tar.1 cs.po de.po fi.po ga.po id.po 
ru.po sl.po zh_CN.po


Cheers

--- tar-1.29/src/buffer.c   2016-03-14 20:58:16.0 +
+++ tar-1.29.1/src/buffer.c 2017-08-05 11:51:57.457112307 +0100
@@ -270,6 +270,8 @@
   ct_lzip,
   ct_lzma,
   ct_lzop,
+  ct_lz4,
+  ct_zstd,
   ct_xz
 };
 
@@ -298,6 +300,8 @@
   { ct_lzip, 4, "LZIP" },
   { ct_lzma, 6, "\xFFLZMA" },
   { ct_lzop, 4, "\211LZO" },
+  { ct_lz4,  4, "\x04\x22\x4D\x18" },
+  { ct_zstd, 4, "\x28\xB5\x2F\xFD" },
   { ct_xz,   6, "\xFD" "7zXZ" },
 };
 
@@ -313,6 +317,8 @@
   { ct_lzma, LZMA_PROGRAM, "--lzma" },
   { ct_lzma, XZ_PROGRAM,   "-J" },
   { ct_lzop, LZOP_PROGRAM, "--lzop" },
+  { ct_lz4,  LZ4_PROGRAM,  "--lz4" },
+  { ct_zstd, ZSTD_PROGRAM, "--zstd" },
   { ct_xz,   XZ_PROGRAM,   "-J" },
   { ct_none }
 };
--- tar-1.29/config.h.in2016-05-16 09:52:47.0 +0100
+++ tar-1.29.1/config.h.in  2017-08-05 13:17:22.327294420 +0100
@@ -2190,6 +2190,9 @@
slash. */
 #undef LSTAT_FOLLOWS_SLASHED_SYMLINK
 
+/* Define to the program name of lz4 compressor program */
+#undef LZ4_PROGRAM
+
 /* Define to the program name of lzip compressor program */
 #undef LZIP_PROGRAM
 
@@ -2454,6 +2457,9 @@
 /* Define to the program name of xz compressor program */
 #undef XZ_PROGRAM
 
+/* Define to the program name of zstd compressor program */
+#undef ZSTD_PROGRAM
+
 /* Enable large inode numbers on Mac OS X 10.5. */
 #undef _DARWIN_USE_64_BIT_INODE
 
--- tar-1.29/configure.ac   2016-05-16 09:51:12.0 +0100
+++ tar-1.29.1/configure.ac 2017-08-05 12:02:08.206629003 +0100
@@ -249,6 +249,8 @@
 TAR_COMPR_PROGRAM(lzip)
 TAR_COMPR_PROGRAM(lzma)
 TAR_COMPR_PROGRAM(lzop)
+TAR_COMPR_PROGRAM(lz4)
+TAR_COMPR_PROGRAM(zstd)
 TAR_COMPR_PROGRAM(xz)
 
 AC_MSG_CHECKING(for default archive format)
--- tar-1.29/src/suffix.c   2016-01-20 09:26:32.0 +
+++ tar-1.29.1/src/suffix.c 2017-08-05 12:57:33.987681075 +0100
@@ -43,6 +43,8 @@
   { S(lzma, LZMA) },
   { S(tlz,  LZMA) },
   { S(lzo,  LZOP) },
+  { S(lz4,  LZ4) },
+  { S(zst,  ZSTD) },
   { S(xz,   XZ) },
   { S(txz,  XZ) }, /* Slackware */
   { NULL }
--- tar-1.29/src/tar.c  2016-03-24 05:42:14.0 +
+++ tar-1.29.1/src/tar.c2017-08-05 11:31:35.793518014 +0100
@@ -293,6 +293,8 @@
   LZIP_OPTION,
   LZMA_OPTION,
   LZOP_OPTION,
+  LZ4_OPTION,
+  ZSTD_OPTION,
   MODE_OPTION,
   MTIME_OPTION,
   NEWER_MTIME_OPTION,
@@ -681,6 +683,8 @@
   {"lzip", LZIP_OPTION, 0, 0, NULL, GRID+1 },
   {"lzma", LZMA_OPTION, 0, 0, NULL, GRID+1 },
   {"lzop", LZOP_OPTION, 0, 0, NULL, GRID+1 },
+  {"lz4", LZ4_OPTION, 0, 0, NULL, GRID+1 },
+  {"zstd", ZSTD_OPTION, 0, 0, NULL, GRID+1 },
   {"xz", 'J', 0, 0, NULL, GRID+1 },
 #undef GRID
 
@@ -1125,6 +1129,14 @@
 case LZOP_OPTION:
   s = xasprintf (_("filter the archive through %s"), LZOP_PROGRAM);
 
+case LZ4_OPTION:
+  s = xasprintf (_("filter the archive through %s"), LZ4_PROGRAM);
+  break;
+
+case ZSTD_OPTION:
+  s =