--- Begin Message ---
Package: dpkg
Version: 1.19.0.5
Severity: wishlist
Tags: patch
Dear Dpkg Developers,
Please add support for Zstandard compression to dpkg and other
programs generated by the dpkg source package [1].
Tests on packages repackaged with zstd -19 show little increase in
compressed package size compared to xz -6 while decompression speed
decreased dramatically.
For the recompressed firefox .deb (Ubuntu's
firefox_58.0.2+build1-0ubuntu0.17.10.1_amd64.deb) increased ~9% in
size but decompressed in <20% of the original time:
$ du -s firefox-*deb
43960 firefox-xz.deb
47924 firefox-zstd.deb
$ rm -rf firefox-xz/* ;time dpkg-deb -R firefox-xz.deb firefox-xz/
real 0m4,270s
user 0m4,220s
sys 0m0,630s
$ rm -rf firefox-zstd/* ;time dpkg-deb -R firefox-zstd.deb firefox-zstd/
real 0m0,765s
user 0m0,556s
sys 0m0,462s
Tests on the full Ubuntu main archive showed ~6% average increase in
the size of the binary packages.
The patches are also available on Salsa [2].
Cheers,
Balint
--
Balint Reczey
Ubuntu & Debian Developer
[1] http://facebook.github.io/zstd/
[2] https://salsa.debian.org/rbalint/dpkg/commits/zstd
From 79aad733cbc7edd44e124702f82b8a46a3a4aea9 Mon Sep 17 00:00:00 2001
From: Balint Reczey <[email protected]>
Date: Thu, 8 Mar 2018 09:53:36 +0100
Subject: [PATCH 1/4] dpkg: Add Zstandard compression support
---
README | 1 +
configure.ac | 2 +
debian/control | 3 ++
debian/rules | 1 +
dpkg-deb/Makefile.am | 1 +
dpkg-deb/extract.c | 1 +
dpkg-deb/main.c | 2 +-
lib/dpkg/compress.c | 127 +++++++++++++++++++++++++++++++++++++++++++-
lib/dpkg/compress.h | 1 +
m4/dpkg-libs.m4 | 7 +++
man/deb.man | 6 ++-
man/dpkg-deb.man | 2 +-
man/dpkg-source.man | 2 +-
scripts/Dpkg/Compression.pm | 6 +++
14 files changed, 156 insertions(+), 6 deletions(-)
diff --git a/README b/README
index 348f8e700..b0cf0a528 100644
--- a/README
+++ b/README
@@ -72,6 +72,7 @@ To enable optional functionality or programs, this software might be needed:
libmd (used by libdpkg, currently falling back to embedded code)
libz (from zlib, used instead of gzip command-line tool)
+ libzstd (from libzstd, used instead of zstd command-line tool)
liblzma (from xz utils, used instead of xz command-line tool)
libbz2 (from bzip2, used instead of bzip2 command-line tool)
libselinux
diff --git a/configure.ac b/configure.ac
index f6dff9f5e..2fbff6759 100644
--- a/configure.ac
+++ b/configure.ac
@@ -75,6 +75,7 @@ AC_SYS_LARGEFILE
# Checks for libraries.
DPKG_LIB_MD
DPKG_LIB_Z
+DPKG_LIB_ZSTD
DPKG_LIB_BZ2
DPKG_LIB_LZMA
DPKG_LIB_SELINUX
@@ -251,6 +252,7 @@ Configuration:
libselinux . . . . . . . . . : $have_libselinux
libmd . . . . . . . . . . . . : $have_libmd
libz . . . . . . . . . . . . : $have_libz
+ libzstd . . . . . . . . . . : $have_libzstd
liblzma . . . . . . . . . . . : $have_liblzma
libbz2 . . . . . . . . . . . : $have_libbz2
libcurses . . . . . . . . . . : ${have_libcurses:-no}
diff --git a/debian/control b/debian/control
index c73f79762..833f6c2b7 100644
--- a/debian/control
+++ b/debian/control
@@ -20,6 +20,7 @@ Build-Depends:
po4a (>= 0.43),
zlib1g-dev,
libbz2-dev,
+ libzstd-dev,
liblzma-dev,
libselinux1-dev [linux-any],
libncursesw5-dev,
@@ -67,6 +68,7 @@ Multi-Arch: same
Depends:
${misc:Depends},
zlib1g-dev,
+ libzstd-dev,
liblzma-dev,
libbz2-dev,
Description: Debian package management static library
@@ -132,6 +134,7 @@ Recommends:
# Used by Dpkg::Gettext.
liblocale-gettext-perl,
bzip2,
+ zstd,
xz-utils,
Suggests:
debian-keyring,
diff --git a/debian/rules b/debian/rules
index 27a2499ef..92f1d1c48 100755
--- a/debian/rules
+++ b/debian/rules
@@ -64,6 +64,7 @@ build-tree/config.status: configure
--with-devlibdir=\$${prefix}/lib/$(DEB_HOST_MULTIARCH) \
--without-libmd \
--with-libz \
+ --with-libzstd \
--with-liblzma \
--with-libbz2
diff --git a/dpkg-deb/Makefile.am b/dpkg-deb/Makefile.am
index 02d79ed7d..bbd30e02c 100644
--- a/dpkg-deb/Makefile.am
+++ b/dpkg-deb/Makefile.am
@@ -21,5 +21,6 @@ dpkg_deb_LDADD = \
../lib/dpkg/libdpkg.la \
$(LIBINTL) \
$(Z_LIBS) \
+ $(ZSTD_LIBS) \
$(LZMA_LIBS) \
$(BZ2_LIBS)
diff --git a/dpkg-deb/extract.c b/dpkg-deb/extract.c
index dba15dedb..7fd4b2b67 100644
--- a/dpkg-deb/extract.c
+++ b/dpkg-deb/extract.c
@@ -179,6 +179,7 @@ extracthalf(const char *debar, const char *dir,
decompressor = compressor_find_by_extension(extension);
if (decompressor != COMPRESSOR_TYPE_NONE &&
decompressor != COMPRESSOR_TYPE_GZIP &&
+ decompressor != COMPRESSOR_TYPE_ZSTD &&
decompressor != COMPRESSOR_TYPE_XZ)
ohshit(_("archive '%s' uses unknown compression for member '%.*s', "
"giving up"),
diff --git a/dpkg-deb/main.c b/dpkg-deb/main.c
index 52e9ce67d..7f898210e 100644
--- a/dpkg-deb/main.c
+++ b/dpkg-deb/main.c
@@ -108,7 +108,7 @@ usage(const struct cmdinfo *cip, const char *value)
" --[no-]uniform-compression Use the compression params on all members.\n"
" -z# Set the compression level when building.\n"
" -Z<type> Set the compression type used when building.\n"
-" Allowed types: gzip, xz, none.\n"
+" Allowed types: gzip, xz, zstd, none.\n"
" -S<strategy> Set the compression strategy when building.\n"
" Allowed values: none; extreme (xz);\n"
" filtered, huffman, rle, fixed (gzip).\n"
diff --git a/lib/dpkg/compress.c b/lib/dpkg/compress.c
index 44075cdb6..e20add3b7 100644
--- a/lib/dpkg/compress.c
+++ b/lib/dpkg/compress.c
@@ -32,6 +32,9 @@
#ifdef WITH_LIBZ
#include <zlib.h>
#endif
+#ifdef WITH_LIBZSTD
+#include <zstd.h>
+#endif
#ifdef WITH_LIBLZMA
#include <lzma.h>
#endif
@@ -47,7 +50,7 @@
#include <dpkg/buffer.h>
#include <dpkg/command.h>
#include <dpkg/compress.h>
-#if !defined(WITH_LIBZ) || !defined(WITH_LIBLZMA) || !defined(WITH_LIBBZ2)
+#if !defined(WITH_LIBZ) || !defined(WITH_LIBZSTD) || !defined(WITH_LIBLZMA) || !defined(WITH_LIBBZ2)
#include <dpkg/subproc.h>
static void DPKG_ATTR_SENTINEL
@@ -750,6 +753,127 @@ static const struct compressor compressor_lzma = {
.decompress = decompress_lzma,
};
+/*
+ * Zstd compressor.
+ */
+
+#define ZSTD "zstd"
+
+#ifdef WITH_LIBZSTD
+
+static void
+decompress_zstd(int fd_in, int fd_out, const char *desc)
+{
+ size_t const buf_in_size = ZSTD_DStreamInSize();
+ void* const buf_in = malloc(buf_in_size);
+ size_t const buf_out_size = ZSTD_DStreamOutSize();
+ void* const buf_out = malloc(buf_out_size);
+ size_t init_result, just_read, to_read;
+ ZSTD_DStream* const dstream = ZSTD_createDStream();
+ if (dstream == NULL) {
+ ohshit(_("ZSTD_createDStream() error "));
+ }
+
+ /* TODO: a file may consist of multiple appended frames (ex : pzstd).
+ * The following implementation decompresses only the first frame */
+ init_result = ZSTD_initDStream(dstream);
+ if (ZSTD_isError(init_result)) {
+ ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
+ }
+ to_read = init_result;
+ while ((just_read = fd_read(fd_in, buf_in, to_read))) {
+ ZSTD_inBuffer input = { buf_in, just_read, 0 };
+ while (input.pos < input.size) {
+ ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+ to_read = ZSTD_decompressStream(dstream, &output , &input);
+ if (ZSTD_isError(to_read)) {
+ ohshit(_("ZSTD_decompressStream() error : %s \n"),
+ ZSTD_getErrorName(to_read));
+ }
+ fd_write(fd_out, output.dst, output.pos);
+ }
+ }
+
+ ZSTD_freeDStream(dstream);
+ free(buf_in);
+ free(buf_out);
+
+}
+
+static void
+compress_zstd(int fd_in, int fd_out, struct compress_params *params, const char *desc)
+{
+ size_t const buf_in_size = ZSTD_CStreamInSize();
+ void* const buf_in = malloc(buf_in_size);
+ size_t const buf_out_size = ZSTD_CStreamOutSize();
+ void* const buf_out = malloc(buf_out_size);
+ size_t init_result, end_res;
+ size_t just_read, to_read;
+ ZSTD_CStream* const cstream = ZSTD_createCStream();
+ if (cstream == NULL) {
+ ohshit(_("ZSTD_createCStream() error "));
+ }
+
+ init_result = ZSTD_initCStream(cstream, params->level);
+ if (ZSTD_isError(init_result)) {
+ ohshit(_("ZSTD_initCStream() error : %s"), ZSTD_getErrorName(init_result));
+ }
+ to_read = buf_in_size;
+ while ((just_read = fd_read(fd_in, buf_in, to_read))) {
+ ZSTD_inBuffer input = { buf_in, just_read, 0 };
+ while (input.pos < input.size) {
+ ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+ to_read = ZSTD_compressStream(cstream, &output , &input);
+ if (ZSTD_isError(to_read)) {
+ ohshit(_("ZSTD_decompressStream() error : %s \n"), ZSTD_getErrorName(to_read));
+ }
+ fd_write(fd_out, output.dst, output.pos);
+ }
+ }
+ do {
+ ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+ end_res = ZSTD_endStream(cstream, &output);
+ if (ZSTD_isError(end_res)) {
+ ohshit(_("ZSTD_endstreamStream() error : %s \n"), ZSTD_getErrorName(end_res));
+ }
+ fd_write(fd_out, output.dst, output.pos);
+ } while (end_res > 0);
+
+ ZSTD_freeCStream(cstream);
+ free(buf_in);
+ free(buf_out);
+}
+
+#else
+static const char *env_zstd[] = {};
+
+static void
+decompress_zstd(int fd_in, int fd_out, const char *desc)
+{
+ fd_fd_filter(fd_in, fd_out, desc, env_zstd, ZSTD, "-dcq", NULL);
+}
+
+static void
+compress_zstd(int fd_in, int fd_out, struct compress_params *params, const char *desc)
+{
+ char combuf[6];
+
+ snprintf(combuf, sizeof(combuf), "-c%d", params->level);
+ fd_fd_filter(fd_in, fd_out, desc, env_zstd, ZSTD, combuf, "-q", NULL);
+}
+#endif
+
+static const struct compressor compressor_zstd = {
+ .name = "zstd",
+ .extension = ".zst",
+ /* zstd commands's default is 3 but the aim is to be closer to xz's
+ * default compression efficiency */
+ .default_level = 19,
+ .fixup_params = fixup_none_params,
+ .compress = compress_zstd,
+ .decompress = decompress_zstd,
+};
+
/*
* Generic compressor filter.
*/
@@ -760,6 +884,7 @@ static const struct compressor *compressor_array[] = {
[COMPRESSOR_TYPE_XZ] = &compressor_xz,
[COMPRESSOR_TYPE_BZIP2] = &compressor_bzip2,
[COMPRESSOR_TYPE_LZMA] = &compressor_lzma,
+ [COMPRESSOR_TYPE_ZSTD] = &compressor_zstd,
};
static const struct compressor *
diff --git a/lib/dpkg/compress.h b/lib/dpkg/compress.h
index 08aaf2516..1af8a3490 100644
--- a/lib/dpkg/compress.h
+++ b/lib/dpkg/compress.h
@@ -42,6 +42,7 @@ enum compressor_type {
COMPRESSOR_TYPE_XZ,
COMPRESSOR_TYPE_BZIP2,
COMPRESSOR_TYPE_LZMA,
+ COMPRESSOR_TYPE_ZSTD,
};
enum compressor_strategy {
diff --git a/m4/dpkg-libs.m4 b/m4/dpkg-libs.m4
index 577264706..8cbb3faa3 100644
--- a/m4/dpkg-libs.m4
+++ b/m4/dpkg-libs.m4
@@ -74,6 +74,13 @@ AC_DEFUN([DPKG_LIB_Z], [
DPKG_WITH_COMPRESS_LIB([z], [zlib.h], [gzdopen])
])# DPKG_LIB_Z
+# DPKG_LIB_ZSTD
+# -------------
+# Check for zstd library.
+AC_DEFUN([DPKG_LIB_ZSTD], [
+ DPKG_WITH_COMPRESS_LIB([zstd], [zstd.h], [ZSTD_decompressStream])
+])# DPKG_LIB_ZSTD
+
# DPKG_LIB_LZMA
# -------------
# Check for lzma library.
diff --git a/man/deb.man b/man/deb.man
index 2b8da5ff0..8d680dbdd 100644
--- a/man/deb.man
+++ b/man/deb.man
@@ -75,8 +75,9 @@ The second required member is named
.BR control.tar .
It is a tar archive containing the package control information, either
not compressed (supported since dpkg 1.17.6), or compressed with
-gzip (with \fB.gz\fP extension) or
-xz (with \fB.xz\fP extension, supported since 1.17.6),
+gzip (with \fB.gz\fP extension),
+xz (with \fB.xz\fP extension, supported since 1.17.6) or
+zstd (with \fB.zst\fP extension, supported since 1.19.1),
as a series of plain files, of which the file
.B control
is mandatory and contains the core control information, the
@@ -98,6 +99,7 @@ It contains the filesystem as a tar archive, either
not compressed (supported since dpkg 1.10.24), or compressed with
gzip (with \fB.gz\fP extension),
xz (with \fB.xz\fP extension, supported since dpkg 1.15.6),
+zstd (with \fB.zst\fP extension, supported since 1.19.1),
bzip2 (with \fB.bz2\fP extension, supported since dpkg 1.10.24) or
lzma (with \fB.lzma\fP extension, supported since dpkg 1.13.25).
.PP
diff --git a/man/dpkg-deb.man b/man/dpkg-deb.man
index f843f0a83..f94539f6a 100644
--- a/man/dpkg-deb.man
+++ b/man/dpkg-deb.man
@@ -230,7 +230,7 @@ The default for this field is “${Package}\\t${Version}\\n”.
.TP
.BI \-z compress-level
Specify which compression level to use on the compressor backend, when
-building a package (default is 9 for gzip, 6 for xz).
+building a package (default is 9 for gzip, 6 for xz and 19 for zstd).
The accepted values are 0-9 with: 0 being mapped to compressor none for gzip.
Before dpkg 1.16.2 level 0 was equivalent to compressor none for all
compressors.
diff --git a/man/dpkg-source.man b/man/dpkg-source.man
index 2233d7a8d..991162003 100644
--- a/man/dpkg-source.man
+++ b/man/dpkg-source.man
@@ -176,7 +176,7 @@ Specify the compression to use for created tarballs and diff files
(\fB\-\-compression\fP since dpkg 1.15.5).
Note that this option will not cause existing tarballs to be recompressed,
it only affects new files. Supported values are:
-.IR gzip ", " bzip2 ", " lzma " and " xz .
+.IR gzip ", " bzip2 ", " lzma ", " zstd " and " xz .
The default is \fIxz\fP for formats 2.0 and newer, and \fIgzip\fP for
format 1.0. \fIxz\fP is only supported since dpkg 1.15.5.
.TP
diff --git a/scripts/Dpkg/Compression.pm b/scripts/Dpkg/Compression.pm
index 3dbc4adf0..4ea512fdc 100644
--- a/scripts/Dpkg/Compression.pm
+++ b/scripts/Dpkg/Compression.pm
@@ -72,6 +72,12 @@ my $COMP = {
decomp_prog => [ 'unxz', '--format=lzma' ],
default_level => 6,
},
+ zstd => {
+ file_ext => 'zst',
+ comp_prog => [ 'zstd', '-q' ],
+ decomp_prog => [ 'unzstd', '-q' ],
+ default_level => 19,
+ },
xz => {
file_ext => 'xz',
comp_prog => [ 'xz' ],
--
2.15.1
From 9dec1a3f6be2e3d525a92f5a123300618407cb19 Mon Sep 17 00:00:00 2001
From: Balint Reczey <[email protected]>
Date: Thu, 8 Mar 2018 10:14:30 +0100
Subject: [PATCH 2/4] Add test for zstd decompression
---
debian/control | 1 +
t-func/deb-format.at | 13 +++++++++++++
2 files changed, 14 insertions(+)
diff --git a/debian/control b/debian/control
index 833f6c2b7..b9e3c9cf4 100644
--- a/debian/control
+++ b/debian/control
@@ -19,6 +19,7 @@ Build-Depends:
# Needed for --porefs.
po4a (>= 0.43),
zlib1g-dev,
+ zstd,
libbz2-dev,
libzstd-dev,
liblzma-dev,
diff --git a/t-func/deb-format.at b/t-func/deb-format.at
index cdfc648a8..d9ca8b2c5 100644
--- a/t-func/deb-format.at
+++ b/t-func/deb-format.at
@@ -28,6 +28,7 @@ xz -c control.tar >control.tar.xz
xz -c data.tar >data.tar.xz
bzip2 -c data.tar >data.tar.bz2
lzma -c data.tar >data.tar.lzma
+zstd -q -c data.tar >data.tar.zst
touch _ignore
touch unknown
])
@@ -290,6 +291,18 @@ drwxr-xr-x root/root 0 1970-01-01 00:00 ./
-rw-r--r-- root/root 5 1970-01-01 00:00 ./file-templ
])
+AT_CHECK([
+# Test data.tar.zst member
+ar rc pkg-data-zst.deb debian-binary control.tar.gz data.tar.zst
+ar t pkg-data-zst.deb
+dpkg-deb -c pkg-data-zst.deb
+], [], [debian-binary
+control.tar.gz
+data.tar.zst
+drwxr-xr-x root/root 0 1970-01-01 00:00 ./
+-rw-r--r-- root/root 5 1970-01-01 00:00 ./file-templ
+])
+
AT_CHECK([
# Test data.tar.lzma member
ar rc pkg-data-lzma.deb debian-binary control.tar.gz data.tar.lzma
--
2.15.1
From c927d94df0fdc59c25961505a5438b0dfc58710a Mon Sep 17 00:00:00 2001
From: Balint Reczey <[email protected]>
Date: Fri, 9 Mar 2018 15:19:43 +0100
Subject: [PATCH 3/4] dpkg: Support Zstandard compressed packages with multiple
frames
---
lib/dpkg/compress.c | 10 ++++++++--
t-func/deb-format.at | 2 +-
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/lib/dpkg/compress.c b/lib/dpkg/compress.c
index e20add3b7..2d804818f 100644
--- a/lib/dpkg/compress.c
+++ b/lib/dpkg/compress.c
@@ -774,8 +774,6 @@ decompress_zstd(int fd_in, int fd_out, const char *desc)
ohshit(_("ZSTD_createDStream() error "));
}
- /* TODO: a file may consist of multiple appended frames (ex : pzstd).
- * The following implementation decompresses only the first frame */
init_result = ZSTD_initDStream(dstream);
if (ZSTD_isError(init_result)) {
ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
@@ -791,6 +789,14 @@ decompress_zstd(int fd_in, int fd_out, const char *desc)
ZSTD_getErrorName(to_read));
}
fd_write(fd_out, output.dst, output.pos);
+ /* possible next frame */
+ if (to_read == 0) {
+ init_result = ZSTD_initDStream(dstream);
+ if (ZSTD_isError(init_result)) {
+ ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
+ }
+ to_read = init_result;
+ }
}
}
diff --git a/t-func/deb-format.at b/t-func/deb-format.at
index d9ca8b2c5..0296c1d04 100644
--- a/t-func/deb-format.at
+++ b/t-func/deb-format.at
@@ -28,7 +28,7 @@ xz -c control.tar >control.tar.xz
xz -c data.tar >data.tar.xz
bzip2 -c data.tar >data.tar.bz2
lzma -c data.tar >data.tar.lzma
-zstd -q -c data.tar >data.tar.zst
+pzstd -q -c data.tar >data.tar.zst
touch _ignore
touch unknown
])
--
2.15.1
From d4b3f22299339f4b54f0013b5f86eff48db1e8c4 Mon Sep 17 00:00:00 2001
From: Balint Reczey <[email protected]>
Date: Fri, 9 Mar 2018 11:19:24 +0100
Subject: [PATCH 4/4] dpkg: Enable zstd uniform compression
---
dpkg-deb/main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/dpkg-deb/main.c b/dpkg-deb/main.c
index 7f898210e..7a40ecb80 100644
--- a/dpkg-deb/main.c
+++ b/dpkg-deb/main.c
@@ -245,6 +245,7 @@ int main(int argc, const char *const *argv) {
if (opt_uniform_compression &&
(compress_params.type != COMPRESSOR_TYPE_NONE &&
compress_params.type != COMPRESSOR_TYPE_GZIP &&
+ compress_params.type != COMPRESSOR_TYPE_ZSTD &&
compress_params.type != COMPRESSOR_TYPE_XZ))
badusage(_("unsupported compression type '%s' with uniform compression"),
compressor_get_name(compress_params.type));
--
2.15.1
--- End Message ---