[PATCH v7 2/5] crypto: add zBeWalgo to crypto-api
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by zram. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig | 12 crypto/Makefile| 1 + crypto/testmgr.c | 10 +++ crypto/testmgr.h | 134 crypto/zbewalgo.c | 164 + drivers/block/zram/zcomp.c | 3 + 6 files changed, 324 insertions(+) create mode 100644 crypto/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 76e8c88c..749457a6 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1686,6 +1686,18 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "zBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the zBeWalgo compression algorithm. This algorithm + accepts only input sizes of at most one page at once. + To achieve high compression ratios zbewalgo can call multiple + transformation and compression algorithms in a row to optimize + the compressed size. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 4fc69fe9..493cbd65 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -124,6 +124,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/testmgr.c b/crypto/testmgr.c index af4a01c5..53fd43d1 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3611,6 +3611,16 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_xts_dec_tv_template) } } + }, { + .alg = "zbewalgo", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zbewalgo_comp_tv_template), + .decomp = __VECS(zbewalgo_decomp_tv_template) + } + } }, { .alg = "zlib-deflate", .test = alg_test_comp, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 004c0a0f..960bfbcf 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -37009,6 +37009,140 @@ static const struct hash_testvec bfin_crc_tv_template[] = { }; +static const struct comp_testvec zbewalgo_comp_tv_template[] = { + { + .inlen = 512, + .outlen = 402, + .input = + "\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d" + "\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d" + "\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d" + "\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d" + "\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d" + "\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d" + "\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d" + "\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d" + "\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d" + "\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d" + "\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d" + "\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d" + "\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d" + "\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d" + "\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d" + "\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d" + "\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d" + "\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d" + "\x76\x7f\x4b\xaf\x49\x9e\xab\x3d\xeb\x38\x53\xea\xc2\x9b\xab\x3d" +
[PATCH v7 3/5] crypto: add unsafe decompression to api
Up to Version 3 of this patch the decompressor of zbewalgo did not verify that there is no overflow in the output buffer. Now zbewalgo includes a safe decompressor which does check for buffer overflows and heap-error. ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version, which is a bit faster, but does no error checking. These unsafe decompressors can be applied when the datasource and the whole datapath is trusted. This patch publishes these existing functions in the crypto-api Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/842.c | 3 ++- crypto/compress.c| 10 ++ crypto/crypto_null.c | 3 ++- crypto/deflate.c | 3 ++- crypto/lz4.c | 23 ++- crypto/lz4hc.c | 23 ++- crypto/lzo.c | 3 ++- crypto/testmgr.c | 27 ++- crypto/zbewalgo.c| 29 - drivers/block/zram/zram_drv.c| 34 +- drivers/block/zram/zram_drv.h| 1 + drivers/crypto/cavium/zip/zip_main.c | 6 -- drivers/crypto/nx/nx-842-powernv.c | 3 ++- drivers/crypto/nx/nx-842-pseries.c | 3 ++- include/linux/crypto.h | 16 15 files changed, 174 insertions(+), 13 deletions(-) diff --git a/crypto/842.c b/crypto/842.c index bc26dc94..7e74ea26 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -112,7 +112,8 @@ static struct crypto_alg alg = { .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, - .coa_decompress = crypto842_decompress } } + .coa_decompress = crypto842_decompress, + .coa_decompress_unsafe = crypto842_decompress } } }; static struct scomp_alg scomp = { diff --git a/crypto/compress.c b/crypto/compress.c index f2d52292..bec79624 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm, dlen); } +static int crypto_decompress_unsafe(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, +u8 *dst, unsigned int *dlen) +{ + return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src, + slen, dst, + dlen); +} + int crypto_init_compress_ops(struct crypto_tfm *tfm) { struct compress_tfm *ops = >crt_compress; ops->cot_compress = crypto_compress; ops->cot_decompress = crypto_decompress; + ops->cot_decompress_unsafe = crypto_decompress_unsafe; return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c74..6e15e8c0 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { { .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, - .coa_decompress = null_compress } } + .coa_decompress = null_compress, + .coa_decompress_unsafe = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); diff --git a/crypto/deflate.c b/crypto/deflate.c index 94ec3b36..4b681a37 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -286,7 +286,8 @@ static struct crypto_alg alg = { .cra_exit = deflate_exit, .cra_u = { .compress = { .coa_compress = deflate_compress, - .coa_decompress = deflate_decompress } } + .coa_decompress = deflate_decompress, + .coa_decompress_unsafe = deflate_decompress } } }; static struct scomp_alg scomp[] = { { diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d..60a1914b 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, return 0; } +static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + int out_len = LZ4_decompress_fast(src, dst, *dlen); + + if (out_len < 0) + return -EINVAL; + + *dlen = out_len; + return 0; +} + static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) @@ -117,6 +130,13 @@ static int lz
[PATCH v7 4/5] crypto: configurable compression level
Most compression algorithms published by the crypto api are supporting multiple different compression levels. The crypto api currently just calls these algorithms with their default compression level. This patch enables the caller to specify the compression level. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/api.c | 76 +++ crypto/deflate.c | 16 + crypto/lz4.c | 16 + crypto/lz4hc.c| 13 +--- crypto/testmgr.c | 2 +- drivers/block/zram/zcomp.c| 10 +++--- drivers/block/zram/zcomp.h| 3 +- drivers/block/zram/zram_drv.c | 24 -- drivers/block/zram/zram_drv.h | 1 + fs/pstore/platform.c | 2 +- fs/ubifs/compress.c | 2 +- include/linux/crypto.h| 9 +++-- mm/zswap.c| 2 +- net/xfrm/xfrm_ipcomp.c| 3 +- 14 files changed, 147 insertions(+), 32 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 1d5290c6..81c3d416 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -388,6 +388,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); +struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg, + u32 type, u32 mask, int level) +{ + struct crypto_tfm *tfm = NULL; + unsigned int tfm_size; + int err = -ENOMEM; + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); + tfm = kzalloc(tfm_size, GFP_KERNEL); + if (!tfm) + goto out_err; + + tfm->__crt_alg = alg; + if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS) + tfm->crt_compress.cot_level = level; + + err = crypto_init_ops(tfm, type, mask); + if (err) + goto out_free_tfm; + + if (!tfm->exit && alg->cra_init) { + err = alg->cra_init(tfm); + if (err) + goto cra_init_failed; + } + + goto out; + +cra_init_failed: + crypto_exit_ops(tfm); +out_free_tfm: + if (err == -EAGAIN) + crypto_shoot_alg(alg); + kfree(tfm); +out_err: + tfm = ERR_PTR(err); +out: + return tfm; +} +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress); + /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm @@ -444,6 +485,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_base); +struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type, + u32 mask, int level) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + if (IS_ERR(alg)) { + err = PTR_ERR(alg); + goto err; + } + + tfm = __crypto_alloc_tfm_compress(alg, type, mask, level); + if (!IS_ERR(tfm)) + return tfm; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + } + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_base_compress); + void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/deflate.c b/crypto/deflate.c index 4b681a37..54a2ff21 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -24,6 +24,7 @@ * it is not needed for IPCOMP and keeps the code simpler. It can be * implemented if someone wants it. */ + #include #include #include @@ -43,7 +44,7 @@ struct deflate_ctx { struct z_stream_s decomp_stream; }; -static int deflate_comp_init(struct deflate_ctx *ctx, int format) +static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level) { int ret = 0; struct z_stream_s *stream = >comp_stream; @@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int format) goto out; } if (format) - ret = zlib_deflateInit(stream, 3); + ret = zlib_deflateInit(stream, level); else - ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, + ret = zlib_deflateInit2(stream, level, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); @@ -109,11 +110,11 @@ static void
[PATCH v7 1/5] add compression algorithm zBeWalgo
zBeWalgo is a completely new algorithm - Currently it is not published somewhere else right now, googleing it would not show up any results. The following section describes how the algorithm works. zBeWalgo itself is a container compression algorithm, which can execute multiple different compression and transformation algorithms after each other. The execution of different compression algorithms after each other will be called 'combination' in this description and in the code. Additionally to be able to execute combinations of algorithms, zBeWalgo can try different combinations on the same input. This allows high compression ratios on completely different datasets, which would otherwise require its own algorithm each. Executing all known combinations on each input page would be very slow. Therefore the data is compressed at first with that combination, which was already successful on the last input page. If the compressed data size of the current page is similar to that of the last page, the compressed data is returned immediately without even trying the other combinations. Even if there is no guarantee that consecutive calls to the algorithm belong to each other, the speed improvement is obvious. ZRAM uses zsmalloc for the management of the compressed pages. The largest size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than that threshold, ZRAM ignores the compression and writes the uncompressed page instead. As a consequence it is useless to continue compression, if the algorithm detects, that the data can not be compressed using the current combination. The threshold for aborting compression can be changed via sysfs at any time, even if the algorithm is currently in use. If a combination fails to compress the data, zBeWalgo tries the next combination. If no combination is able to reduce the data in size, zBeWalgo returns a negative value. Each combination consists of up to 7 compression and transformation steps. Combinations can be added and removed at any time via sysfs. Already compressed Data can always be decompressed, even if the combination used to produce it does not exist anymore. Technically the user could add up to 256 combinations concurrently, but that would be very time consuming if the data can not be compressed. To be able to build combinations and call different algorithms, all those algorithms are implementing the same interface. This enables the user to specify additional combinations while ZRAM is running. Within the combinations many different algorithms can be used. Some of those algorithms are published. This patch adds the following algorithms to be used within the combinations: - bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and 'D. J. Wheeler' in 1994. This implementation uses counting sort for sorting the data. Their original paper is online available at: http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf - mtf: The Move-To-Front algorithm as described by 'M. Burrows' and 'D. J. Wheeler' in the same paper as bwt. - jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012. https://arxiv.org/pdf/1209.1045.pdf - jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive Bytes can increase the compression ratio, if for example the first 4 Bits of each Byte are zero. If jbe2 is called after mtf, this happens ofthen. - rle: Run Length Encoding - huffman: Huffman encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- include/linux/zbewalgo.h | 50 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 120 lib/zbewalgo/BWT.h| 21 ++ lib/zbewalgo/JBE.c| 204 + lib/zbewalgo/JBE.h| 13 + lib/zbewalgo/JBE2.c | 221 ++ lib/zbewalgo/JBE2.h | 13 + lib/zbewalgo/MTF.c| 122 lib/zbewalgo/MTF.h| 13 + lib/zbewalgo/Makefile
[PATCH v7 0/5] add compression algorithm zBeWalgo
ewalgo'*2.37 337.07 463.32 zbewalgo' 2.37 337.07 468.96 zbewalgo* 2.60 101.17 578.35 zbewalgo 2.60 101.17 586.88 - 'Isabella TCf01' This dataset is an array of floating point values between -83.00402 and 31.51576. Detailed Information about this dataset is online available at http://www.vets.ucar.edu/vg/isabeldata/readme.html zBeWalgo is the only algorithm which can compress this dataset with a noticeable compressionratio. Algorithmratiowrite read 842 1.0060.09 1956.26 --hdd-- 1.00 134.70 156.62 lz4hc_01 1.00 154.81 1839.37 lz4hc*_01 1.00 154.81 2105.53 lz4hc_10 1.00 157.33 2078.69 lz4hc*_10 1.00 157.33 2113.14 lz4hc_09 1.00 158.50 2018.51 lz4hc*_09 1.00 158.50 2093.65 lz4hc*_02 1.00 159.54 2104.91 lz4hc_02 1.00 159.54 2117.34 lz4hc_03 1.00 161.26 2070.76 lz4hc*_03 1.00 161.26 2107.27 lz4hc*_08 1.00 161.34 2100.74 lz4hc_08 1.00 161.34 2105.26 lz4hc*_04 1.00 161.95 2080.96 lz4hc_04 1.00 161.95 2104.00 lz4hc_05 1.00 162.17 2044.43 lz4hc*_05 1.00 162.17 2101.74 lz4hc*_06 1.00 163.61 2087.19 lz4hc_06 1.00 163.61 2104.61 lz4hc_07 1.00 164.51 2094.78 lz4hc*_07 1.00 164.51 2105.53 lz4_011.00 1134.89 2109.70 lz4*_01 1.00 1134.89 2118.71 lz4*_08 1.00 1141.96 2104.87 lz4_081.00 1141.96 2118.97 lz4_091.00 1145.55 2087.76 lz4*_09 1.00 1145.55 2118.85 lz4_021.00 1157.28 2094.33 lz4*_02 1.00 1157.28 2124.67 lz4*_03 1.00 1194.18 2106.36 lz4_031.00 1194.18 2119.89 lz4_041.00 1195.09 2117.03 lz4*_04 1.00 1195.09 2120.23 lz4*_05 1.00 1225.56 2109.04 lz4_051.00 1225.56 2120.52 lz4*_06 1.00 1261.67 2109.14 lz4_061.00 1261.67 2121.13 lz4*_07 1.00 1270.86 1844.63 lz4_071.00 1270.86 2041.08 lz4_101.00 1305.36 2109.22 lz4*_10 1.00 1305.36 2120.65 lzo 1.00 1338.61 2109.66 zstd_17 1.0313.93 1138.94 zstd_18 1.0314.01 1170.78 zstd_16 1.0327.12 1073.75 zstd_15 1.0343.52 1061.97 zstd_14 1.0349.60 1082.98 zstd_12 1.0355.03 1042.43 zstd_13 1.0355.14 1173.50 zstd_11 1.0355.24 1178.05 zstd_10 1.0370.01 1173.05 zstd_07 1.03 118.10 1041.92 zstd_06 1.03 123.00 1171.59 zstd_05 1.03 124.61 1165.74 zstd_01 1.03 166.80 1005.29 zstd_04 1.03 170.25 1127.75 zstd_03 1.03 171.40 1172.34 zstd_02 1.03 174.08 1017.34 zstd_09 1.03 195.30 1176.82 zstd_08 1.03 195.98 1175.09 deflate_9 1.0530.15 483.55 deflate_8 1.0530.45 466.67 deflate_5 1.0531.25 480.92 deflate_4 1.0531.84 472.81 deflate_7 1.0531.84 484.18 deflate_6 1.0531.94 481.37 deflate_2 1.0533.07 484.09 deflate_3 1.0533.11 463.57 deflate_1 1.0533.19 469.71 zstd_22 1.06 8.89 647.75 zstd_21 1.0610.70 700.11 zstd_20 1.0610.80 723.42 zstd_19 1.0612.41 764.24 zbewalgo* 1.51 146.45 581.43 zbewalgo 1.51 146.45 592.86 zbewalgo'*1.5438.14 120.96 zbewalgo' 1.5438.14 125.81 Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> Benjamin Warnke (5): add compression algorithm zBeWalgo crypto: add zBeWalgo to crypto-api crypto: add unsafe decompression to api crypto: configurable compression level crypto: add flag for unstable encoding crypto/842.c | 3 +- crypto/Kconfig | 12 + crypto/Makefile | 1 + crypto/api.c | 76 crypto/compress.c| 10 + crypto/crypto_null.c | 3 +- crypto/deflate.c | 19 +- crypto/lz4.c | 39 +- crypto/lz4hc.c | 36 +- crypto/lzo.c | 3 +- crypto/testmgr.c | 39 +- crypto/testmgr.h | 134 +++ crypto/zbewalgo.c| 191 ++ drivers/block/zram/zcomp.c | 13 +- drivers/block/zram/zcomp.h | 3 +- drivers/block/zram/zram_drv.c| 56 ++- drivers/block/zram/zram_drv.h| 2 + drivers/crypto/cavium/zip/zip_main.c | 6 +- drivers/crypto/nx/nx-842-powernv.c | 3 +- drivers/crypto/nx/nx-842-pseries.c | 3 +- fs/ubifs/compress.c | 2 +- include/linux/crypto.h | 31 +- include/linux/zbewalgo.h | 50 +++ lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c | 120 ++ lib/zbewalgo/BWT.h | 21 ++ lib/zbewalgo/JBE.c
[PATCH v7 5/5] crypto: add flag for unstable encoding
The data-format of zBeWalgo, and some other algorithms is unstable. To identify such unstable algorithms this patch adds a new flag to the crypto-api. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/zbewalgo.c | 2 +- include/linux/crypto.h | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c index 9db0d43b..e57b5ced 100644 --- a/crypto/zbewalgo.c +++ b/crypto/zbewalgo.c @@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct crypto_tfm *tfm, static struct crypto_alg crypto_alg_zbewalgo = { .cra_name = "zbewalgo", - .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING, .cra_ctxsize = sizeof(struct zbewalgo_ctx), .cra_module = THIS_MODULE, .cra_init = zbewalgo_init, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6bfb1aea..2228dd08 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,12 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY0x4000 +/* + * Set if the algorithm is new and it is likely that the encoding may + * change in near future + */ +#define CRYPTO_ALG_UNSTABLE_ENCODING 0x8000 + /* * Transform masks and values (for crt_flags). */ -- 2.14.1
[PATCH v6 4/5] crypto: configurable compression level
Most compression algorithms published by the crypto api are supporting multiple different compression levels. The crypto api currently just calls these algorithms with their default compression level. This patch enables the caller to specify the compression level. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/api.c | 76 +++ crypto/deflate.c | 16 + crypto/lz4.c | 16 + crypto/lz4hc.c| 13 +--- crypto/testmgr.c | 2 +- drivers/block/zram/zcomp.c| 10 +++--- drivers/block/zram/zcomp.h| 3 +- drivers/block/zram/zram_drv.c | 24 -- drivers/block/zram/zram_drv.h | 1 + fs/ubifs/compress.c | 2 +- include/linux/crypto.h| 9 +++-- mm/zswap.c| 2 +- net/xfrm/xfrm_ipcomp.c| 3 +- 13 files changed, 146 insertions(+), 31 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 70a894e52..dadd4dede 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); +struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg, + u32 type, u32 mask, int level) +{ + struct crypto_tfm *tfm = NULL; + unsigned int tfm_size; + int err = -ENOMEM; + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); + tfm = kzalloc(tfm_size, GFP_KERNEL); + if (!tfm) + goto out_err; + + tfm->__crt_alg = alg; + if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS) + tfm->crt_compress.cot_level = level; + + err = crypto_init_ops(tfm, type, mask); + if (err) + goto out_free_tfm; + + if (!tfm->exit && alg->cra_init) { + err = alg->cra_init(tfm); + if (err) + goto cra_init_failed; + } + + goto out; + +cra_init_failed: + crypto_exit_ops(tfm); +out_free_tfm: + if (err == -EAGAIN) + crypto_shoot_alg(alg); + kfree(tfm); +out_err: + tfm = ERR_PTR(err); +out: + return tfm; +} +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress); + /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm @@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_base); +struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type, + u32 mask, int level) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + if (IS_ERR(alg)) { + err = PTR_ERR(alg); + goto err; + } + + tfm = __crypto_alloc_tfm_compress(alg, type, mask, level); + if (!IS_ERR(tfm)) + return tfm; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + } + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_base_compress); + void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/deflate.c b/crypto/deflate.c index 4b681a37c..54a2ff21b 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -24,6 +24,7 @@ * it is not needed for IPCOMP and keeps the code simpler. It can be * implemented if someone wants it. */ + #include #include #include @@ -43,7 +44,7 @@ struct deflate_ctx { struct z_stream_s decomp_stream; }; -static int deflate_comp_init(struct deflate_ctx *ctx, int format) +static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level) { int ret = 0; struct z_stream_s *stream = >comp_stream; @@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int format) goto out; } if (format) - ret = zlib_deflateInit(stream, 3); + ret = zlib_deflateInit(stream, level); else - ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, + ret = zlib_deflateInit2(stream, level, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); @@ -109,11 +110,11 @@ static void deflate_decomp_exit(str
[PATCH v6 2/5] crypto: add zBeWalgo to crypto-api
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by zram. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig| 12 crypto/Makefile | 1 + crypto/testmgr.c | 10 +++ crypto/testmgr.h | 134 ++ crypto/zbewalgo.c | 164 ++ drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- 7 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 crypto/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b75264b09..3ac0d4ca7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1668,6 +1668,18 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "zBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the zBeWalgo compression algorithm. This algorithm + accepts only input sizes of at most one page at once. + To achieve high compression ratios zbewalgo can call multiple + transformation and compression algorithms in a row to optimize + the compressed size. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d5e23a142..294075476 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_xts_dec_tv_template) } } + }, { + .alg = "zbewalgo", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zbewalgo_comp_tv_template), + .decomp = __VECS(zbewalgo_decomp_tv_template) + } + } }, { .alg = "zlib-deflate", .test = alg_test_comp, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6044f6906..996d8321e 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -35133,6 +35133,140 @@ static const struct hash_testvec bfin_crc_tv_template[] = { }; +static const struct comp_testvec zbewalgo_comp_tv_template[] = { + { + .inlen = 512, + .outlen = 402, + .input = + "\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d" + "\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d" + "\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d" + "\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d" + "\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d" + "\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d" + "\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d" + "\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d" + "\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d" + "\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d" + "\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d" + "\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d" + "\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d" + "\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d" + "\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d" + "\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d" + "\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d" + "\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d" + "\x76\
[PATCH v6 3/5] crypto: add unsafe decompression to api
Up to Version 3 of this patch the decompressor of zbewalgo did not verify that there is no overflow in the output buffer. Now zbewalgo includes a safe decompressor which does check for buffer overflows and heap-error. ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version, which is a bit faster, but does no error checking. These unsafe decompressors can be applied when the datasource and the whole datapath is trusted. This patch publishes these existing functions in the crypto-api Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/842.c | 3 ++- crypto/compress.c| 10 ++ crypto/crypto_null.c | 3 ++- crypto/deflate.c | 3 ++- crypto/lz4.c | 23 ++- crypto/lz4hc.c | 23 ++- crypto/lzo.c | 3 ++- crypto/testmgr.c | 27 ++- crypto/zbewalgo.c| 29 - drivers/block/zram/zram_drv.c| 34 +- drivers/block/zram/zram_drv.h| 1 + drivers/crypto/cavium/zip/zip_main.c | 6 -- drivers/crypto/nx/nx-842-powernv.c | 3 ++- drivers/crypto/nx/nx-842-pseries.c | 3 ++- include/linux/crypto.h | 16 15 files changed, 174 insertions(+), 13 deletions(-) diff --git a/crypto/842.c b/crypto/842.c index bc26dc942..7e74ea26b 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -112,7 +112,8 @@ static struct crypto_alg alg = { .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, - .coa_decompress = crypto842_decompress } } + .coa_decompress = crypto842_decompress, + .coa_decompress_unsafe = crypto842_decompress } } }; static struct scomp_alg scomp = { diff --git a/crypto/compress.c b/crypto/compress.c index f2d522924..bec796249 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm, dlen); } +static int crypto_decompress_unsafe(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, +u8 *dst, unsigned int *dlen) +{ + return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src, + slen, dst, + dlen); +} + int crypto_init_compress_ops(struct crypto_tfm *tfm) { struct compress_tfm *ops = >crt_compress; ops->cot_compress = crypto_compress; ops->cot_decompress = crypto_decompress; + ops->cot_decompress_unsafe = crypto_decompress_unsafe; return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c746..6e15e8c0b 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { { .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, - .coa_decompress = null_compress } } + .coa_decompress = null_compress, + .coa_decompress_unsafe = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); diff --git a/crypto/deflate.c b/crypto/deflate.c index 94ec3b36a..4b681a37c 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -286,7 +286,8 @@ static struct crypto_alg alg = { .cra_exit = deflate_exit, .cra_u = { .compress = { .coa_compress = deflate_compress, - .coa_decompress = deflate_decompress } } + .coa_decompress = deflate_decompress, + .coa_decompress_unsafe = deflate_decompress } } }; static struct scomp_alg scomp[] = { { diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d3..60a1914b7 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, return 0; } +static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + int out_len = LZ4_decompress_fast(src, dst, *dlen); + + if (out_len < 0) + return -EINVAL; + + *dlen = out_len; + return 0; +} + static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) @@ -117,6 +130,13 @@ st
[PATCH v6 0/5] add compression algorithm zBeWalgo
ewalgo* 2.60 101.17 578.35 zbewalgo 2.60 101.17 586.88 - 'Isabella TCf01' This dataset is an array of floating point values between -83.00402 and 31.51576. Detailed Information about this dataset is online available at http://www.vets.ucar.edu/vg/isabeldata/readme.html zBeWalgo is the only algorithm which can compress this dataset with a noticeable compressionratio. Algorithmratiowrite read 842 1.0060.09 1956.26 --hdd-- 1.00 134.70 156.62 lz4hc_01 1.00 154.81 1839.37 lz4hc*_01 1.00 154.81 2105.53 lz4hc_10 1.00 157.33 2078.69 lz4hc*_10 1.00 157.33 2113.14 lz4hc_09 1.00 158.50 2018.51 lz4hc*_09 1.00 158.50 2093.65 lz4hc*_02 1.00 159.54 2104.91 lz4hc_02 1.00 159.54 2117.34 lz4hc_03 1.00 161.26 2070.76 lz4hc*_03 1.00 161.26 2107.27 lz4hc*_08 1.00 161.34 2100.74 lz4hc_08 1.00 161.34 2105.26 lz4hc*_04 1.00 161.95 2080.96 lz4hc_04 1.00 161.95 2104.00 lz4hc_05 1.00 162.17 2044.43 lz4hc*_05 1.00 162.17 2101.74 lz4hc*_06 1.00 163.61 2087.19 lz4hc_06 1.00 163.61 2104.61 lz4hc_07 1.00 164.51 2094.78 lz4hc*_07 1.00 164.51 2105.53 lz4_011.00 1134.89 2109.70 lz4*_01 1.00 1134.89 2118.71 lz4*_08 1.00 1141.96 2104.87 lz4_081.00 1141.96 2118.97 lz4_091.00 1145.55 2087.76 lz4*_09 1.00 1145.55 2118.85 lz4_021.00 1157.28 2094.33 lz4*_02 1.00 1157.28 2124.67 lz4*_03 1.00 1194.18 2106.36 lz4_031.00 1194.18 2119.89 lz4_041.00 1195.09 2117.03 lz4*_04 1.00 1195.09 2120.23 lz4*_05 1.00 1225.56 2109.04 lz4_051.00 1225.56 2120.52 lz4*_06 1.00 1261.67 2109.14 lz4_061.00 1261.67 2121.13 lz4*_07 1.00 1270.86 1844.63 lz4_071.00 1270.86 2041.08 lz4_101.00 1305.36 2109.22 lz4*_10 1.00 1305.36 2120.65 lzo 1.00 1338.61 2109.66 zstd_17 1.0313.93 1138.94 zstd_18 1.0314.01 1170.78 zstd_16 1.0327.12 1073.75 zstd_15 1.0343.52 1061.97 zstd_14 1.0349.60 1082.98 zstd_12 1.0355.03 1042.43 zstd_13 1.0355.14 1173.50 zstd_11 1.0355.24 1178.05 zstd_10 1.0370.01 1173.05 zstd_07 1.03 118.10 1041.92 zstd_06 1.03 123.00 1171.59 zstd_05 1.03 124.61 1165.74 zstd_01 1.03 166.80 1005.29 zstd_04 1.03 170.25 1127.75 zstd_03 1.03 171.40 1172.34 zstd_02 1.03 174.08 1017.34 zstd_09 1.03 195.30 1176.82 zstd_08 1.03 195.98 1175.09 deflate_9 1.0530.15 483.55 deflate_8 1.0530.45 466.67 deflate_5 1.0531.25 480.92 deflate_4 1.0531.84 472.81 deflate_7 1.0531.84 484.18 deflate_6 1.0531.94 481.37 deflate_2 1.0533.07 484.09 deflate_3 1.0533.11 463.57 deflate_1 1.0533.19 469.71 zstd_22 1.06 8.89 647.75 zstd_21 1.0610.70 700.11 zstd_20 1.0610.80 723.42 zstd_19 1.0612.41 764.24 zbewalgo* 1.51 146.45 581.43 zbewalgo 1.51 146.45 592.86 zbewalgo'*1.5438.14 120.96 zbewalgo' 1.5438.14 125.81 Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> Benjamin Warnke (5): add compression algorithm zBeWalgo crypto: add zBeWalgo to crypto-api crypto: add unsafe decompression to api crypto: configurable compression level crypto: add flag for unstable encoding crypto/842.c | 3 +- crypto/Kconfig | 12 + crypto/Makefile | 1 + crypto/api.c | 76 crypto/compress.c| 10 + crypto/crypto_null.c | 3 +- crypto/deflate.c | 19 +- crypto/lz4.c | 39 +- crypto/lz4hc.c | 36 +- crypto/lzo.c | 3 +- crypto/testmgr.c | 39 +- crypto/testmgr.h | 134 +++ crypto/zbewalgo.c| 191 ++ drivers/block/zram/zcomp.c | 13 +- drivers/block/zram/zcomp.h | 3 +- drivers/block/zram/zram_drv.c| 56 ++- drivers/block/zram/zram_drv.h| 6 +- drivers/crypto/cavium/zip/zip_main.c | 6 +- drivers/crypto/nx/nx-842-powernv.c | 3 +- drivers/crypto/nx/nx-842-pseries.c | 3 +- fs/ubifs/compress.c | 2 +- include/linux/crypto.h | 31 +- include/linux/zbewalgo.h | 50 +++ lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c | 120 ++ lib/zbewalgo/BWT.h | 21 ++ lib/zbewalgo/JBE.c | 204 ++ lib/zbewalgo/JBE.h | 13 + lib/zbe
[PATCH v6 1/5] add compression algorithm zBeWalgo
zBeWalgo is a completely new algorithm - Currently it is not published somewhere else right now, googleing it would not show up any results. The following section describes how the algorithm works. zBeWalgo itself is a container compression algorithm, which can execute multiple different compression and transformation algorithms after each other. The execution of different compression algorithms after each other will be called 'combination' in this description and in the code. Additionally to be able to execute combinations of algorithms, zBeWalgo can try different combinations on the same input. This allows high compression ratios on completely different datasets, which would otherwise require its own algorithm each. Executing all known combinations on each input page would be very slow. Therefore the data is compressed at first with that combination, which was already successful on the last input page. If the compressed data size of the current page is similar to that of the last page, the compressed data is returned immediately without even trying the other combinations. Even if there is no guarantee that consecutive calls to the algorithm belong to each other, the speed improvement is obvious. ZRAM uses zsmalloc for the management of the compressed pages. The largest size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than that threshold, ZRAM ignores the compression and writes the uncompressed page instead. As a consequence it is useless to continue compression, if the algorithm detects, that the data can not be compressed using the current combination. The threshold for aborting compression can be changed via sysfs at any time, even if the algorithm is currently in use. If a combination fails to compress the data, zBeWalgo tries the next combination. If no combination is able to reduce the data in size, zBeWalgo returns a negative value. Each combination consists of up to 7 compression and transformation steps. Combinations can be added and removed at any time via sysfs. Already compressed Data can always be decompressed, even if the combination used to produce it does not exist anymore. Technically the user could add up to 256 combinations concurrently, but that would be very time consuming if the data can not be compressed. To be able to build combinations and call different algorithms, all those algorithms are implementing the same interface. This enables the user to specify additional combinations while ZRAM is running. Within the combinations many different algorithms can be used. Some of those algorithms are published. This patch adds the following algorithms to be used within the combinations: - bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and 'D. J. Wheeler' in 1994. This implementation uses counting sort for sorting the data. Their original paper is online available at: http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf - mtf: The Move-To-Front algorithm as described by 'M. Burrows' and 'D. J. Wheeler' in the same paper as bwt. - jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012. https://arxiv.org/pdf/1209.1045.pdf - jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive Bytes can increase the compression ratio, if for example the first 4 Bits of each Byte are zero. If jbe2 is called after mtf, this happens ofthen. - rle: Run Length Encoding - huffman: Huffman encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- include/linux/zbewalgo.h | 50 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 120 lib/zbewalgo/BWT.h| 21 ++ lib/zbewalgo/JBE.c| 204 + lib/zbewalgo/JBE.h| 13 + lib/zbewalgo/JBE2.c | 221 ++ lib/zbewalgo/JBE2.h | 13 + lib/zbewalgo/MTF.c| 122 lib/zbewalgo/MTF.h| 13 + lib/zbewalgo/Makefile
[PATCH v6 5/5] crypto: add flag for unstable encoding
The data-format of zBeWalgo, and some other algorithms is unstable. To identify such unstable algorithms this patch adds a new flag to the crypto-api. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/zbewalgo.c | 2 +- include/linux/crypto.h | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c index 9db0d43be..e57b5ced5 100644 --- a/crypto/zbewalgo.c +++ b/crypto/zbewalgo.c @@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct crypto_tfm *tfm, static struct crypto_alg crypto_alg_zbewalgo = { .cra_name = "zbewalgo", - .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING, .cra_ctxsize = sizeof(struct zbewalgo_ctx), .cra_module = THIS_MODULE, .cra_init = zbewalgo_init, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 63420dac0..372893569 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,12 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY0x4000 +/* + * Set if the algorithm is new and it is likely that the encoding may + * change in near future + */ +#define CRYPTO_ALG_UNSTABLE_ENCODING 0x8000 + /* * Transform masks and values (for crt_flags). */ -- 2.14.1
[RFC PATCH v5 2/5] crypto: add zBeWalgo to crypto-api
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by zram. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig| 12 crypto/Makefile | 1 + crypto/testmgr.c | 10 +++ crypto/testmgr.h | 134 ++ crypto/zbewalgo.c | 164 ++ drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- 7 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 crypto/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b75264b09..3ac0d4ca7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1668,6 +1668,18 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "zBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the zBeWalgo compression algorithm. This algorithm + accepts only input sizes of at most one page at once. + To achieve high compression ratios zbewalgo can call multiple + transformation and compression algorithms in a row to optimize + the compressed size. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d5e23a142..294075476 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_xts_dec_tv_template) } } + }, { + .alg = "zbewalgo", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zbewalgo_comp_tv_template), + .decomp = __VECS(zbewalgo_decomp_tv_template) + } + } }, { .alg = "zlib-deflate", .test = alg_test_comp, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6044f6906..996d8321e 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -35133,6 +35133,140 @@ static const struct hash_testvec bfin_crc_tv_template[] = { }; +static const struct comp_testvec zbewalgo_comp_tv_template[] = { + { + .inlen = 512, + .outlen = 402, + .input = + "\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d" + "\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d" + "\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d" + "\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d" + "\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d" + "\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d" + "\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d" + "\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d" + "\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d" + "\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d" + "\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d" + "\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d" + "\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d" + "\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d" + "\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d" + "\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d" + "\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d" + "\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d" + "\x76\
[RFC PATCH v5 0/5] add compression algorithm zBeWalgo
TCf01' This dataset is an array of floating point values between -83.00402 and 31.51576. Detailed Information about this dataset is online available at http://www.vets.ucar.edu/vg/isabeldata/readme.html zBeWalgo is the only algorithm which can compress this dataset with a noticeable compressionratio. Algorithmratiowrite read 842 1.0060.09 1956.26 --hdd-- 1.00 134.70 156.62 lz4hc_01 1.00 154.81 1839.37 lz4hc*_01 1.00 154.81 2105.53 lz4hc_10 1.00 157.33 2078.69 lz4hc*_10 1.00 157.33 2113.14 lz4hc_09 1.00 158.50 2018.51 lz4hc*_09 1.00 158.50 2093.65 lz4hc*_02 1.00 159.54 2104.91 lz4hc_02 1.00 159.54 2117.34 lz4hc_03 1.00 161.26 2070.76 lz4hc*_03 1.00 161.26 2107.27 lz4hc*_08 1.00 161.34 2100.74 lz4hc_08 1.00 161.34 2105.26 lz4hc*_04 1.00 161.95 2080.96 lz4hc_04 1.00 161.95 2104.00 lz4hc_05 1.00 162.17 2044.43 lz4hc*_05 1.00 162.17 2101.74 lz4hc*_06 1.00 163.61 2087.19 lz4hc_06 1.00 163.61 2104.61 lz4hc_07 1.00 164.51 2094.78 lz4hc*_07 1.00 164.51 2105.53 lz4_011.00 1134.89 2109.70 lz4*_01 1.00 1134.89 2118.71 lz4*_08 1.00 1141.96 2104.87 lz4_081.00 1141.96 2118.97 lz4_091.00 1145.55 2087.76 lz4*_09 1.00 1145.55 2118.85 lz4_021.00 1157.28 2094.33 lz4*_02 1.00 1157.28 2124.67 lz4*_03 1.00 1194.18 2106.36 lz4_031.00 1194.18 2119.89 lz4_041.00 1195.09 2117.03 lz4*_04 1.00 1195.09 2120.23 lz4*_05 1.00 1225.56 2109.04 lz4_051.00 1225.56 2120.52 lz4*_06 1.00 1261.67 2109.14 lz4_061.00 1261.67 2121.13 lz4*_07 1.00 1270.86 1844.63 lz4_071.00 1270.86 2041.08 lz4_101.00 1305.36 2109.22 lz4*_10 1.00 1305.36 2120.65 lzo 1.00 1338.61 2109.66 zstd_17 1.0313.93 1138.94 zstd_18 1.0314.01 1170.78 zstd_16 1.0327.12 1073.75 zstd_15 1.0343.52 1061.97 zstd_14 1.0349.60 1082.98 zstd_12 1.0355.03 1042.43 zstd_13 1.0355.14 1173.50 zstd_11 1.0355.24 1178.05 zstd_10 1.0370.01 1173.05 zstd_07 1.03 118.10 1041.92 zstd_06 1.03 123.00 1171.59 zstd_05 1.03 124.61 1165.74 zstd_01 1.03 166.80 1005.29 zstd_04 1.03 170.25 1127.75 zstd_03 1.03 171.40 1172.34 zstd_02 1.03 174.08 1017.34 zstd_09 1.03 195.30 1176.82 zstd_08 1.03 195.98 1175.09 deflate_9 1.0530.15 483.55 deflate_8 1.0530.45 466.67 deflate_5 1.0531.25 480.92 deflate_4 1.0531.84 472.81 deflate_7 1.0531.84 484.18 deflate_6 1.0531.94 481.37 deflate_2 1.0533.07 484.09 deflate_3 1.0533.11 463.57 deflate_1 1.0533.19 469.71 zstd_22 1.06 8.89 647.75 zstd_21 1.0610.70 700.11 zstd_20 1.0610.80 723.42 zstd_19 1.0612.41 764.24 zbewalgo* 1.51 146.45 581.43 zbewalgo 1.51 146.45 592.86 zbewalgo'*1.5438.14 120.96 zbewalgo' 1.5438.14 125.81 Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- -- 2.14.1
[RFC PATCH v5 4/5] crypto: configurable compression level
Most compression algorithms published by the crypto api are supporting multiple different compression levels. The crypto api currently just calls these algorithms with their default compression level. This patch enables the caller to specify the compression level. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/api.c | 76 +++ crypto/deflate.c | 16 + crypto/lz4.c | 16 + crypto/lz4hc.c| 13 +--- crypto/testmgr.c | 2 +- drivers/block/zram/zcomp.c| 10 +++--- drivers/block/zram/zcomp.h| 3 +- drivers/block/zram/zram_drv.c | 24 -- drivers/block/zram/zram_drv.h | 1 + fs/ubifs/compress.c | 2 +- include/linux/crypto.h| 9 +++-- mm/zswap.c| 2 +- net/xfrm/xfrm_ipcomp.c| 3 +- 13 files changed, 146 insertions(+), 31 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 70a894e52..dadd4dede 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); +struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg, + u32 type, u32 mask, int level) +{ + struct crypto_tfm *tfm = NULL; + unsigned int tfm_size; + int err = -ENOMEM; + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); + tfm = kzalloc(tfm_size, GFP_KERNEL); + if (!tfm) + goto out_err; + + tfm->__crt_alg = alg; + if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS) + tfm->crt_compress.cot_level = level; + + err = crypto_init_ops(tfm, type, mask); + if (err) + goto out_free_tfm; + + if (!tfm->exit && alg->cra_init) { + err = alg->cra_init(tfm); + if (err) + goto cra_init_failed; + } + + goto out; + +cra_init_failed: + crypto_exit_ops(tfm); +out_free_tfm: + if (err == -EAGAIN) + crypto_shoot_alg(alg); + kfree(tfm); +out_err: + tfm = ERR_PTR(err); +out: + return tfm; +} +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress); + /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm @@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_base); +struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type, + u32 mask, int level) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + if (IS_ERR(alg)) { + err = PTR_ERR(alg); + goto err; + } + + tfm = __crypto_alloc_tfm_compress(alg, type, mask, level); + if (!IS_ERR(tfm)) + return tfm; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + } + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_base_compress); + void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/deflate.c b/crypto/deflate.c index 4b681a37c..54a2ff21b 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -24,6 +24,7 @@ * it is not needed for IPCOMP and keeps the code simpler. It can be * implemented if someone wants it. */ + #include #include #include @@ -43,7 +44,7 @@ struct deflate_ctx { struct z_stream_s decomp_stream; }; -static int deflate_comp_init(struct deflate_ctx *ctx, int format) +static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level) { int ret = 0; struct z_stream_s *stream = >comp_stream; @@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int format) goto out; } if (format) - ret = zlib_deflateInit(stream, 3); + ret = zlib_deflateInit(stream, level); else - ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, + ret = zlib_deflateInit2(stream, level, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); @@ -109,11 +110,11 @@ static void deflate_decomp_exit(str
[RFC PATCH v5 3/5] crypto: add unsafe decompression to api
Up to Version 3 of this patch the decompressor of zbewalgo did not verify that there is no overflow in the output buffer. Now zbewalgo includes a safe decompressor which does check for buffer overflows and heap-error. ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version, which is a bit faster, but does no error checking. These unsafe decompressors can be applied when the datasource and the whole datapath is trusted. This patch publishes these existing functions in the crypto-api Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/842.c | 3 ++- crypto/compress.c| 10 ++ crypto/crypto_null.c | 3 ++- crypto/deflate.c | 3 ++- crypto/lz4.c | 23 ++- crypto/lz4hc.c | 23 ++- crypto/lzo.c | 3 ++- crypto/testmgr.c | 27 ++- crypto/zbewalgo.c| 29 - drivers/block/zram/zram_drv.c| 34 +- drivers/block/zram/zram_drv.h| 1 + drivers/crypto/cavium/zip/zip_main.c | 6 -- drivers/crypto/nx/nx-842-powernv.c | 3 ++- drivers/crypto/nx/nx-842-pseries.c | 3 ++- include/linux/crypto.h | 16 15 files changed, 174 insertions(+), 13 deletions(-) diff --git a/crypto/842.c b/crypto/842.c index bc26dc942..7e74ea26b 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -112,7 +112,8 @@ static struct crypto_alg alg = { .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, - .coa_decompress = crypto842_decompress } } + .coa_decompress = crypto842_decompress, + .coa_decompress_unsafe = crypto842_decompress } } }; static struct scomp_alg scomp = { diff --git a/crypto/compress.c b/crypto/compress.c index f2d522924..bec796249 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm, dlen); } +static int crypto_decompress_unsafe(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, +u8 *dst, unsigned int *dlen) +{ + return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src, + slen, dst, + dlen); +} + int crypto_init_compress_ops(struct crypto_tfm *tfm) { struct compress_tfm *ops = >crt_compress; ops->cot_compress = crypto_compress; ops->cot_decompress = crypto_decompress; + ops->cot_decompress_unsafe = crypto_decompress_unsafe; return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c746..6e15e8c0b 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { { .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, - .coa_decompress = null_compress } } + .coa_decompress = null_compress, + .coa_decompress_unsafe = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); diff --git a/crypto/deflate.c b/crypto/deflate.c index 94ec3b36a..4b681a37c 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -286,7 +286,8 @@ static struct crypto_alg alg = { .cra_exit = deflate_exit, .cra_u = { .compress = { .coa_compress = deflate_compress, - .coa_decompress = deflate_decompress } } + .coa_decompress = deflate_decompress, + .coa_decompress_unsafe = deflate_decompress } } }; static struct scomp_alg scomp[] = { { diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d3..60a1914b7 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, return 0; } +static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + int out_len = LZ4_decompress_fast(src, dst, *dlen); + + if (out_len < 0) + return -EINVAL; + + *dlen = out_len; + return 0; +} + static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) @@ -117,6 +130,13 @@ st
[RFC PATCH v5 1/5] add compression algorithm zBeWalgo
zBeWalgo is a completely new algorithm - Currently it is not published somewhere else right now, googleing it would not show up any results. The following section describes how the algorithm works. zBeWalgo itself is a container compression algorithm, which can execute multiple different compression and transformation algorithms after each other. The execution of different compression algorithms after each other will be called 'combination' in this description and in the code. Additionally to be able to execute combinations of algorithms, zBeWalgo can try different combinations on the same input. This allows high compression ratios on completely different datasets, which would otherwise require its own algorithm each. Executing all known combinations on each input page would be very slow. Therefore the data is compressed at first with that combination, which was already successful on the last input page. If the compressed data size of the current page is similar to that of the last page, the compressed data is returned immediately without even trying the other combinations. Even if there is no guarantee that consecutive calls to the algorithm belong to each other, the speed improvement is obvious. ZRAM uses zsmalloc for the management of the compressed pages. The largest size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than that threshold, ZRAM ignores the compression and writes the uncompressed page instead. As a consequence it is useless to continue compression, if the algorithm detects, that the data can not be compressed using the current combination. The threshold for aborting compression can be changed via sysfs at any time, even if the algorithm is currently in use. If a combination fails to compress the data, zBeWalgo tries the next combination. If no combination is able to reduce the data in size, zBeWalgo returns a negative value. Each combination consists of up to 7 compression and transformation steps. Combinations can be added and removed at any time via sysfs. Already compressed Data can always be decompressed, even if the combination used to produce it does not exist anymore. Technically the user could add up to 256 combinations concurrently, but that would be very time consuming if the data can not be compressed. To be able to build combinations and call different algorithms, all those algorithms are implementing the same interface. This enables the user to specify additional combinations while ZRAM is running. Within the combinations many different algorithms can be used. Some of those algorithms are published. This patch adds the following algorithms to be used within the combinations: - bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and 'D. J. Wheeler' in 1994. This implementation uses counting sort for sorting the data. Their original paper is online available at: http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf - mtf: The Move-To-Front algorithm as described by 'M. Burrows' and 'D. J. Wheeler' in the same paper as bwt. - jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012. https://arxiv.org/pdf/1209.1045.pdf - jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive Bytes can increase the compression ratio, if for example the first 4 Bits of each Byte are zero. If jbe2 is called after mtf, this happens ofthen. - rle: Run Length Encoding - huffman: Huffman encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- include/linux/zbewalgo.h | 50 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 118 lib/zbewalgo/BWT.h| 21 ++ lib/zbewalgo/JBE.c| 204 + lib/zbewalgo/JBE.h| 13 + lib/zbewalgo/JBE2.c | 221 ++ lib/zbewalgo/JBE2.h | 13 + lib/zbewalgo/MTF.c| 122 lib/zbewalgo/MTF.h| 13 + lib/zbewalgo/Makefile
[RFC PATCH v5 5/5] crypto: add flag for unstable encoding
The data-format of zBeWalgo, and some other algorithms is unstable. To identify such unstable algorithms this patch adds a new flag to the crypto-api. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/zbewalgo.c | 2 +- include/linux/crypto.h | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c index 9db0d43be..e57b5ced5 100644 --- a/crypto/zbewalgo.c +++ b/crypto/zbewalgo.c @@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct crypto_tfm *tfm, static struct crypto_alg crypto_alg_zbewalgo = { .cra_name = "zbewalgo", - .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING, .cra_ctxsize = sizeof(struct zbewalgo_ctx), .cra_module = THIS_MODULE, .cra_init = zbewalgo_init, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 63420dac0..372893569 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,12 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY0x4000 +/* + * Set if the algorithm is new and it is likely that the encoding may + * change in near future + */ +#define CRYPTO_ALG_UNSTABLE_ENCODING 0x8000 + /* * Transform masks and values (for crt_flags). */ -- 2.14.1
Re: [PATCH 1/5 v4] add compression algorithm zBeWalgo
Hi Philippe, > Actually to be consistent if you want to use GPL-2-0 (and not "or > later") you should use: > > 1. at the top, for a c. file: > // SPDX-License-Identifier: GPL-2.0 > > or for a .h file: > /* SPDX-License-Identifier: GPL-2.0 */ > > The doc explains it all. Including the comment style (a topic that has > been discussed also on list quite bit: Linus had the final word there) > > 2. and in your MODULE_LICENSE macro: > > MODULE_LICENSE("GPL v2"); > > because a MODULE_LICENSE("GPL"); would mean GPL-2.0+ (e.g. or any > later version) and this would not match your top level license tag. Thanks, I have done this correctly in my files, but accidentally written it wrong in my mail. Cordinally Benjamin Warnke
Re: [PATCH 1/5 v4] add compression algorithm zBeWalgo
Hi Philippe, > Am 20.03.2018 um 17:30 schrieb Philippe Ombredanne <pombreda...@nexb.com>: > > Hi Benjamin, > > On Tue, Mar 20, 2018 at 7:04 AM, Benjamin Warnke > <4bwar...@informatik.uni-hamburg.de> wrote: >> zBeWalgo is a completely new algorithm - Currently it is not published >> somewhere else right now, googleing it would not show up any results. The >> following section describes how the algorithm works. > > > >> diff --git a/lib/zbewalgo/zbewalgo.c b/lib/zbewalgo/zbewalgo.c >> new file mode 100644 >> index 0..ef922bc27 >> --- /dev/null >> +++ b/lib/zbewalgo/zbewalgo.c >> @@ -0,0 +1,723 @@ >> +/* >> + * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License version 2 as published >> by >> + * the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but >> WITHOUT >> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + * more details. >> + * >> + * You should have received a copy of the GNU General Public License along >> with >> + * this program. >> + * > > Would you mind using SPDX ids [1] instead of this fine boilerplate > here and throughout your patches? Ok, I will use /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> ... at the top of my files instead of that boilerplate text. And MODULE_LICENSE("GPL"); at the bottom of the module-files. > > >> +MODULE_LICENSE("GPL"); >> +MODULE_DESCRIPTION("zBeWalgo Compression Algorithm"); > > Here your MODULE_LICENSE does not match your top level license. See > module.h [2] for a description of values: GPL would mean "GNU Public > License v2 or later" whereas your top level license (best expressed > with SPDX) would mean GPL-2.0 and no other version. To avoid > confusion, you would need to state the same thing in the > MODULE_LICENSE and your SPDX tags. I used the file "crypto/lz4.c" - since it is a compression algorithm too - as an example of how to format the licensing text. Unfortunately there is the same 'error'. I fixed this error in all of my files in all patches. Cordially Benjamin Warnke
[PATCH 4/5 v4] crypto: configurable compression level
Most compression algorithms published by the crypto api are supporting multiple different compression levels. The crypto api currently just calls these algorithms with their default compression level. This patch enables the caller to specify the compression level. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/api.c | 76 +++ crypto/deflate.c | 16 + crypto/lz4.c | 16 + crypto/lz4hc.c| 13 +--- crypto/testmgr.c | 2 +- drivers/block/zram/zcomp.c| 10 +++--- drivers/block/zram/zcomp.h| 3 +- drivers/block/zram/zram_drv.c | 24 -- drivers/block/zram/zram_drv.h | 1 + fs/ubifs/compress.c | 2 +- include/linux/crypto.h| 9 +++-- mm/zswap.c| 2 +- net/xfrm/xfrm_ipcomp.c| 3 +- 13 files changed, 146 insertions(+), 31 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 70a894e52..dadd4dede 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); +struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg, + u32 type, u32 mask, int level) +{ + struct crypto_tfm *tfm = NULL; + unsigned int tfm_size; + int err = -ENOMEM; + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); + tfm = kzalloc(tfm_size, GFP_KERNEL); + if (!tfm) + goto out_err; + + tfm->__crt_alg = alg; + if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS) + tfm->crt_compress.cot_level = level; + + err = crypto_init_ops(tfm, type, mask); + if (err) + goto out_free_tfm; + + if (!tfm->exit && alg->cra_init) { + err = alg->cra_init(tfm); + if (err) + goto cra_init_failed; + } + + goto out; + +cra_init_failed: + crypto_exit_ops(tfm); +out_free_tfm: + if (err == -EAGAIN) + crypto_shoot_alg(alg); + kfree(tfm); +out_err: + tfm = ERR_PTR(err); +out: + return tfm; +} +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress); + /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm @@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_base); +struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type, + u32 mask, int level) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + if (IS_ERR(alg)) { + err = PTR_ERR(alg); + goto err; + } + + tfm = __crypto_alloc_tfm_compress(alg, type, mask, level); + if (!IS_ERR(tfm)) + return tfm; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + } + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_base_compress); + void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/deflate.c b/crypto/deflate.c index 4b681a37c..54a2ff21b 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -24,6 +24,7 @@ * it is not needed for IPCOMP and keeps the code simpler. It can be * implemented if someone wants it. */ + #include #include #include @@ -43,7 +44,7 @@ struct deflate_ctx { struct z_stream_s decomp_stream; }; -static int deflate_comp_init(struct deflate_ctx *ctx, int format) +static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level) { int ret = 0; struct z_stream_s *stream = >comp_stream; @@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int format) goto out; } if (format) - ret = zlib_deflateInit(stream, 3); + ret = zlib_deflateInit(stream, level); else - ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, + ret = zlib_deflateInit2(stream, level, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); @@ -109,11 +110,11 @@ static void deflate_decomp_exit(str
[PATCH 5/5 v4] crypto: add flag for unstable encoding
The data-format of zBeWalgo, and some other algorithms is unstable. To identify such unstable algorithms this patch adds a new flag to the crypto-api. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/zbewalgo.c | 2 +- include/linux/crypto.h | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c index c8481b872..9f2b07ac2 100644 --- a/crypto/zbewalgo.c +++ b/crypto/zbewalgo.c @@ -148,7 +148,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct crypto_tfm *tfm, static struct crypto_alg crypto_alg_zbewalgo = { .cra_name = "zbewalgo", - .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING, .cra_ctxsize = sizeof(struct zbewalgo_ctx), .cra_module = THIS_MODULE, .cra_init = zbewalgo_init, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 63420dac0..372893569 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,12 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY0x4000 +/* + * Set if the algorithm is new and it is likely that the encoding may + * change in near future + */ +#define CRYPTO_ALG_UNSTABLE_ENCODING 0x8000 + /* * Transform masks and values (for crt_flags). */ -- 2.14.1
[PATCH 3/5 v4] crypto: add unsafe decompression to api
Up to Version 3 of this patch the decompressor of zbewalgo did not verify that there is no overflow in the output buffer. Now zbewalgo includes a safe decompressor which does check for buffer overflows and heap-error. ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version, which is a bit faster, but does no error checking. These unsafe decompressors can be applied when the datasource and the whole datapath is trusted. This patch publishes these existing functions in the crypto-api Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/842.c | 3 ++- crypto/compress.c| 10 ++ crypto/crypto_null.c | 3 ++- crypto/deflate.c | 3 ++- crypto/lz4.c | 23 ++- crypto/lz4hc.c | 23 ++- crypto/lzo.c | 3 ++- crypto/testmgr.c | 27 ++- crypto/zbewalgo.c| 29 - drivers/block/zram/zram_drv.c| 34 +- drivers/block/zram/zram_drv.h| 1 + drivers/crypto/cavium/zip/zip_main.c | 6 -- drivers/crypto/nx/nx-842-powernv.c | 3 ++- drivers/crypto/nx/nx-842-pseries.c | 3 ++- include/linux/crypto.h | 16 15 files changed, 174 insertions(+), 13 deletions(-) diff --git a/crypto/842.c b/crypto/842.c index bc26dc942..7e74ea26b 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -112,7 +112,8 @@ static struct crypto_alg alg = { .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, - .coa_decompress = crypto842_decompress } } + .coa_decompress = crypto842_decompress, + .coa_decompress_unsafe = crypto842_decompress } } }; static struct scomp_alg scomp = { diff --git a/crypto/compress.c b/crypto/compress.c index f2d522924..bec796249 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm, dlen); } +static int crypto_decompress_unsafe(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, +u8 *dst, unsigned int *dlen) +{ + return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src, + slen, dst, + dlen); +} + int crypto_init_compress_ops(struct crypto_tfm *tfm) { struct compress_tfm *ops = >crt_compress; ops->cot_compress = crypto_compress; ops->cot_decompress = crypto_decompress; + ops->cot_decompress_unsafe = crypto_decompress_unsafe; return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c746..6e15e8c0b 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { { .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, - .coa_decompress = null_compress } } + .coa_decompress = null_compress, + .coa_decompress_unsafe = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); diff --git a/crypto/deflate.c b/crypto/deflate.c index 94ec3b36a..4b681a37c 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -286,7 +286,8 @@ static struct crypto_alg alg = { .cra_exit = deflate_exit, .cra_u = { .compress = { .coa_compress = deflate_compress, - .coa_decompress = deflate_decompress } } + .coa_decompress = deflate_decompress, + .coa_decompress_unsafe = deflate_decompress } } }; static struct scomp_alg scomp[] = { { diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d3..60a1914b7 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, return 0; } +static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + int out_len = LZ4_decompress_fast(src, dst, *dlen); + + if (out_len < 0) + return -EINVAL; + + *dlen = out_len; + return 0; +} + static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) @@ -117,6 +130,13 @@ st
[PATCH 0/5 v4] add compression algorithm zBeWalgo
edu/vg/isabeldata/readme.html zBeWalgo is the only algorithm which can compress this dataset with a noticeable compressionratio. Algorithmratiowrite read 842 1.0060.09 1956.26 --hdd-- 1.00 134.70 156.62 lz4hc_01 1.00 154.81 1839.37 lz4hc*_01 1.00 154.81 2105.53 lz4hc_10 1.00 157.33 2078.69 lz4hc*_10 1.00 157.33 2113.14 lz4hc_09 1.00 158.50 2018.51 lz4hc*_09 1.00 158.50 2093.65 lz4hc*_02 1.00 159.54 2104.91 lz4hc_02 1.00 159.54 2117.34 lz4hc_03 1.00 161.26 2070.76 lz4hc*_03 1.00 161.26 2107.27 lz4hc*_08 1.00 161.34 2100.74 lz4hc_08 1.00 161.34 2105.26 lz4hc*_04 1.00 161.95 2080.96 lz4hc_04 1.00 161.95 2104.00 lz4hc_05 1.00 162.17 2044.43 lz4hc*_05 1.00 162.17 2101.74 lz4hc*_06 1.00 163.61 2087.19 lz4hc_06 1.00 163.61 2104.61 lz4hc_07 1.00 164.51 2094.78 lz4hc*_07 1.00 164.51 2105.53 lz4_011.00 1134.89 2109.70 lz4*_01 1.00 1134.89 2118.71 lz4*_08 1.00 1141.96 2104.87 lz4_081.00 1141.96 2118.97 lz4_091.00 1145.55 2087.76 lz4*_09 1.00 1145.55 2118.85 lz4_021.00 1157.28 2094.33 lz4*_02 1.00 1157.28 2124.67 lz4*_03 1.00 1194.18 2106.36 lz4_031.00 1194.18 2119.89 lz4_041.00 1195.09 2117.03 lz4*_04 1.00 1195.09 2120.23 lz4*_05 1.00 1225.56 2109.04 lz4_051.00 1225.56 2120.52 lz4*_06 1.00 1261.67 2109.14 lz4_061.00 1261.67 2121.13 lz4*_07 1.00 1270.86 1844.63 lz4_071.00 1270.86 2041.08 lz4_101.00 1305.36 2109.22 lz4*_10 1.00 1305.36 2120.65 lzo 1.00 1338.61 2109.66 zstd_17 1.0313.93 1138.94 zstd_18 1.0314.01 1170.78 zstd_16 1.0327.12 1073.75 zstd_15 1.0343.52 1061.97 zstd_14 1.0349.60 1082.98 zstd_12 1.0355.03 1042.43 zstd_13 1.0355.14 1173.50 zstd_11 1.0355.24 1178.05 zstd_10 1.0370.01 1173.05 zstd_07 1.03 118.10 1041.92 zstd_06 1.03 123.00 1171.59 zstd_05 1.03 124.61 1165.74 zstd_01 1.03 166.80 1005.29 zstd_04 1.03 170.25 1127.75 zstd_03 1.03 171.40 1172.34 zstd_02 1.03 174.08 1017.34 zstd_09 1.03 195.30 1176.82 zstd_08 1.03 195.98 1175.09 deflate_9 1.0530.15 483.55 deflate_8 1.0530.45 466.67 deflate_5 1.0531.25 480.92 deflate_4 1.0531.84 472.81 deflate_7 1.0531.84 484.18 deflate_6 1.0531.94 481.37 deflate_2 1.0533.07 484.09 deflate_3 1.0533.11 463.57 deflate_1 1.0533.19 469.71 zstd_22 1.06 8.89 647.75 zstd_21 1.0610.70 700.11 zstd_20 1.0610.80 723.42 zstd_19 1.0612.41 764.24 zbewalgo* 1.51 146.45 581.43 zbewalgo 1.51 146.45 592.86 zbewalgo'*1.5438.14 120.96 zbewalgo' 1.5438.14 125.81 Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- -- 2.14.1
[PATCH 2/5 v4] crypto: add zBeWalgo to crypto-api
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by zram. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig| 12 +++ crypto/Makefile | 1 + crypto/testmgr.c | 10 +++ crypto/testmgr.h | 134 crypto/zbewalgo.c | 177 ++ drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- 7 files changed, 340 insertions(+), 1 deletion(-) create mode 100644 crypto/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b75264b09..3ac0d4ca7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1668,6 +1668,18 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "zBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the zBeWalgo compression algorithm. This algorithm + accepts only input sizes of at most one page at once. + To achieve high compression ratios zbewalgo can call multiple + transformation and compression algorithms in a row to optimize + the compressed size. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d5e23a142..294075476 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_xts_dec_tv_template) } } + }, { + .alg = "zbewalgo", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zbewalgo_comp_tv_template), + .decomp = __VECS(zbewalgo_decomp_tv_template) + } + } }, { .alg = "zlib-deflate", .test = alg_test_comp, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6044f6906..996d8321e 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -35133,6 +35133,140 @@ static const struct hash_testvec bfin_crc_tv_template[] = { }; +static const struct comp_testvec zbewalgo_comp_tv_template[] = { + { + .inlen = 512, + .outlen = 402, + .input = + "\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d" + "\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d" + "\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d" + "\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d" + "\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d" + "\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d" + "\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d" + "\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d" + "\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d" + "\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d" + "\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d" + "\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d" + "\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d" + "\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d" + "\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d" + "\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d" + "\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d" + "\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d" + "\x76\x7f\
[PATCH 1/5 v4] add compression algorithm zBeWalgo
zBeWalgo is a completely new algorithm - Currently it is not published somewhere else right now, googleing it would not show up any results. The following section describes how the algorithm works. zBeWalgo itself is a container compression algorithm, which can execute multiple different compression and transformation algorithms after each other. The execution of different compression algorithms after each other will be called 'combination' in this description and in the code. Additionally to be able to execute combinations of algorithms, zBeWalgo can try different combinations on the same input. This allows high compression ratios on completely different datasets, which would otherwise require its own algorithm each. Executing all known combinations on each input page would be very slow. Therefore the data is compressed at first with that combination, which was already successful on the last input page. If the compressed data size of the current page is similar to that of the last page, the compressed data is returned immediately without even trying the other combinations. Even if there is no guarantee that consecutive calls to the algorithm belong to each other, the speed improvement is obvious. ZRAM uses zsmalloc for the management of the compressed pages. The largest size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than that threshold, ZRAM ignores the compression and writes the uncompressed page instead. As a consequence it is useless to continue compression, if the algorithm detects, that the data can not be compressed using the current combination. The threshold for aborting compression can be changed via sysfs at any time, even if the algorithm is currently in use. If a combination fails to compress the data, zBeWalgo tries the next combination. If no combination is able to reduce the data in size, zBeWalgo returns a negative value. Each combination consists of up to 7 compression and transformation steps. Combinations can be added and removed at any time via sysfs. Already compressed Data can always be decompressed, even if the combination used to produce it does not exist anymore. Technically the user could add up to 256 combinations concurrently, but that would be very time consuming if the data can not be compressed. To be able to build combinations and call different algorithms, all those algorithms are implementing the same interface. This enables the user to specify additional combinations while ZRAM is running. Within the combinations many different algorithms can be used. Some of those algorithms are published. This patch adds the following algorithms to be used within the combinations: - bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and 'D. J. Wheeler' in 1994. This implementation uses counting sort for sorting the data. Their original paper is online available at: http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf - mtf: The Move-To-Front algorithm as described by 'M. Burrows' and 'D. J. Wheeler' in the same paper as bwt. - jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012. https://arxiv.org/pdf/1209.1045.pdf - jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive Bytes can increase the compression ratio, if for example the first 4 Bits of each Byte are zero. If jbe2 is called after mtf, this happens ofthen. - rle: Run Length Encoding - huffman: Huffman encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- include/linux/zbewalgo.h | 62 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 129 + lib/zbewalgo/BWT.h| 33 +++ lib/zbewalgo/JBE.c| 215 ++ lib/zbewalgo/JBE.h| 25 ++ lib/zbewalgo/JBE2.c | 232 +++ lib/zbewalgo/JBE2.h | 25 ++ lib/zbewalgo/MTF.c| 133 + lib/zbewalgo/MTF.h| 25 ++ lib/zbewalgo/Ma
Re: [RESEND PATCH v3] crypto: add zBeWalgo compression for zram
Hi Eric, On 06.03.2018 at 23:13, Eric Biggers wrote: > > Hi Benjamin, > > On Tue, Mar 06, 2018 at 09:23:08PM +0100, Benjamin Warnke wrote: >> Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM >> compresses each page individually. As a result the compression algorithm is >> forced to use a very small sliding window. None of the available compression >> algorithms is designed to achieve high compression ratios with small inputs. >> >> This patch adds a new compression algorithm 'zBeWalgo' to the crypto api. >> This >> algorithm focusses on increasing the capacity of the compressed block-device >> created by ZRAM. The choice of compression algorithms is always a tradeoff >> between speed and compression ratio. >> > [...] >> >> Zstd is not in the list of compared algorithms, because it is not available >> for Zram in the currently available kernel trees. >> > > This still isn't a valid excuse for not comparing it to Zstandard. Zstandard > is > already in the kernel in lib/. It would only need glue code to wire it up as > an > scomp_alg to the crypto API. I'll add benchmarks with Zstandard. > In contrast you're proposing an entirely new > algorithm. Ultimately, by proposing a new algorithm you're on the hook to > demonstrate that existing algorithms aren't good enough. Note that many of > the > existing algorithms including Zstandard also have multiple compression levels > available to choose from. The crypto api exposes each algorithm only once with its default compression level. Currently there is no switch/option/mechanism/code or something to switch thoose levels. Of course I could benchmark the algorithms with multiple compression levels. How many / Which compression levels would you like to see to be compared with each other? > It's also rare to add a compression or encryption algorithm to the Linux > kernel > that isn't already used somewhere else. The kernel is the only place where so many small pieces of data need to be compressed. In user space nobody cares that the data is splitted into pages, and therefore compression usually applied to large datasets. > Have you at least written a formal > specification of the 'zBeWalgo' data format? Not yet. > Zstandard, for example, had a > well-tested and optimized userspace library and a specification of the data > format before it was added to the kernel. And even before that it took a > couple > years of development to stabilize the Zstandard data format. > > Now, it's true that you don't strictly need a stable data format if the > algorithm will only ever be used for RAM compression. But, if you want to > take > that shortcut, then you're on the hook to justify it, and perhaps to enlighten > the crypto API about algorithms that don't have a stable data format (e.g. > using > a new CRYPTO_ALG_* flag) so that users have to more explicitly opt-in to using > them. I could add CRYPTO_ALG_TYPE_COMPRESS_UNSTABLE as an alias for CRYPTO_ALG_TYPE_COMPRESS > You cannot just ignore fuzz-safety in the decompressor either. The existing > decompressors exposed through the crypto API are fuzz-safe, so it's not valid > to > compare an unsafe decompressor to them. If you really do want to add an > unsafe > decompressor then you'd likely need to look into adding crypto API support for > users to request unsafe decompression -- which could also be used to expose > the > unsafe version of the LZ4 decompressor (LZ4_decompress_fast() instead of > LZ4_decompress_safe()). Or if you do make the decompressor safe, then of > course > you'd actually need to fuzz it; I suggest porting the code to userspace and > running american fuzzy lop (afl-fuzz) on it: http://lcamtuf.coredump.cx/afl/ The current version of the decompressor is fuzz-safe. I tested it with lots of data and made sure, that each possible branch in the code is executed multiple times in different combinations. > Separately, given that this is a brand new algorithm and it seems to have > various corner cases, it would be a good idea to demonstrate a higher > guarantee > of the correctness of the compression-decompression round trip. afl-fuzz can > be > good for that too: you could port the code to userspace and write a simple > program which compresses and decompresses a file and aborts if it was > corrupted. > Then by passing the program to afl-fuzz it will eventually cover most of the > compressor and decompressor. I've done that when testing compression code in > the past. I will port my code to user space and use afl-fuzz to test it. Benjamin
Re: [RESEND PATCH v3] crypto: add zBeWalgo compression for zram
Hello, On(07/03/2018 03:12),Sergey Senozhatsky wrote: > > Hello, > > On (03/06/18 20:59), Benjamin Warnke wrote: >> Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM >> compresses each page individually. As a result the compression algorithm >> is >> forced to use a very small sliding window. None of the available >> compression >> algorithms is designed to achieve high compression ratios with small >> inputs. > > I think you first need to merge zBeWalgo (looks like a long way to go) > And then add ZRAM support, as a separate patch. I'll split my patch into 2 parts 1st: add zBeWalgo compression algorithm 2nd: enable zBeWalgo to be used by ZRAM > >> - 'ecoham' (100 MiB) This dataset is one of the input files for the >> scientific >> application ECOHAM which runs an ocean simulation. This dataset contains a >> lot of zeros. Where the data is not zero there are arrays of floating >> point >> values, adjacent float values are likely to be similar to each other, >> allowing for high compression ratios. >> >> algorithm | ratio | compression| decompression >> zbewalgo | 12.94 | 294.10 MBit/s | 1242.59 MBit/s >> deflate | 12.54 | 75.51 MBit/s | 736.39 MBit/s >> 842 | 12.26 | 182.59 MBit/s | 683.61 MBit/s >> lz4hc | 12.00 | 51.23 MBit/s | 1524.73 MBit/s >> lz4 | 10.68 | 1334.37 MBit/s | 1603.54 MBit/s >> lzo |9.79 | 1333.76 MBit/s | 1534.63 MBit/s >> >> - 'source-code' (800 MiB) This dataset is a tarball of the source-code >> from a >> linux-kernel. >> >> algorithm | ratio | compression| decompression >> deflate |3.27 | 42.48 MBit/s | 250.36 MBit/s >> lz4hc |2.40 | 104.14 MBit/s | 1150.53 MBit/s >> lzo |2.27 | 444.77 MBit/s | 886.97 MBit/s >> lz4 |2.18 | 453.08 MBit/s | 1101.45 MBit/s >> 842 |1.65 | 64.10 MBit/s | 158.40 MBit/s >> zbewalgo |1.19 | 52.89 MBit/s | 197.58 MBit/s >> >> - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the >> running hpcg-benchmark. At the time of the snapshot, that application >> performed a sparse matrix - vector multiplication. >> >> algorithm | ratio | compression| decompression >> zbewalgo | 16.16 | 179.97 MBit/s | 468.36 MBit/s >> deflate |9.52 | 65.11 MBit/s | 632.69 MBit/s >> lz4hc |4.96 | 193.33 MBit/s | 1607.12 MBit/s >> 842 |4.20 | 150.99 MBit/s | 316.22 MBit/s >> lzo |4.14 | 922.74 MBit/s | 865.32 MBit/s >> lz4 |3.79 | 908.39 MBit/s | 1375.33 MBit/s >> >> - 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar, >> but >> not equal. This array is produced by a partial differential equation >> solver >> using a Jakobi-implementation. >> >> algorithm | ratio | compression| decompression >> zbewalgo |1.30 | 203.30 MBit/s | 530.87 MBit/s >> deflate |1.02 | 37.06 MBit/s | 1131.88 MBit/s >> lzo |1.00 | 1741.46 MBit/s | 2012.78 MBit/s >> lz4 |1.00 | 1458.08 MBit/s | 2013.88 MBit/s >> lz4hc |1.00 | 173.19 MBit/s | 2012.37 MBit/s >> 842 |1.00 | 64.10 MBit/s | 2013.64 MBit/s > > Hm, mixed feelings. as Eric Biggers suggested, I'll add Zstandard to the set of algorithms which compared. What else should I add to the benchmarks? Benjamin
[RESEND PATCH v3] crypto: add zBeWalgo compression for zram
invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Benchmarks: To obtain reproducable benchmarks, the datasets were first loaded into a userspace-program. Than the data is written directly to a clean zram-partition without any filesystem. Between writing and reading 'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are wall clock times, and the benchmarks are using only one cpu-core at a time. The new algorithm is compared to all available compression algorithms from the crypto-api. Zstd is not in the list of compared algorithms, because it is not available for Zram in the currently available kernel trees. - '/dev/zero' (8 GiB) This dataset is used to measure the speed limitations for ZRAM. ZRAM filters zero-data internally and does not even call the specified compression algorithm. algorithm | compression| decompression --zram-- | 2724.08 MBit/s | 2828.87 MBit/s - 'ecoham' (100 MiB) This dataset is one of the input files for the scientific application ECOHAM which runs an ocean simulation. This dataset contains a lot of zeros. Where the data is not zero there are arrays of floating point values, adjacent float values are likely to be similar to each other, allowing for high compression ratios. algorithm | ratio | compression| decompression zbewalgo | 12.94 | 294.10 MBit/s | 1242.59 MBit/s deflate | 12.54 | 75.51 MBit/s | 736.39 MBit/s 842 | 12.26 | 182.59 MBit/s | 683.61 MBit/s lz4hc | 12.00 | 51.23 MBit/s | 1524.73 MBit/s lz4 | 10.68 | 1334.37 MBit/s | 1603.54 MBit/s lzo |9.79 | 1333.76 MBit/s | 1534.63 MBit/s - 'source-code' (800 MiB) This dataset is a tarball of the source-code from a linux-kernel. algorithm | ratio | compression| decompression deflate |3.27 | 42.48 MBit/s | 250.36 MBit/s lz4hc |2.40 | 104.14 MBit/s | 1150.53 MBit/s lzo |2.27 | 444.77 MBit/s | 886.97 MBit/s lz4 |2.18 | 453.08 MBit/s | 1101.45 MBit/s 842 |1.65 | 64.10 MBit/s | 158.40 MBit/s zbewalgo |1.19 | 52.89 MBit/s | 197.58 MBit/s - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the running hpcg-benchmark. At the time of the snapshot, that application performed a sparse matrix - vector multiplication. algorithm | ratio | compression| decompression zbewalgo | 16.16 | 179.97 MBit/s | 468.36 MBit/s deflate |9.52 | 65.11 MBit/s | 632.69 MBit/s lz4hc |4.96 | 193.33 MBit/s | 1607.12 MBit/s 842 |4.20 | 150.99 MBit/s | 316.22 MBit/s lzo |4.14 | 922.74 MBit/s | 865.32 MBit/s lz4 |3.79 | 908.39 MBit/s | 1375.33 MBit/s - 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar, but not equal. This array is produced by a partial differential equation solver using a Jakobi-implementation. algorithm | ratio | compression| decompression zbewalgo |1.30 | 203.30 MBit/s | 530.87 MBit/s deflate |1.02 | 37.06 MBit/s | 1131.88 MBit/s lzo |1.00 | 1741.46 MBit/s | 2012.78 MBit/s lz4 |1.00 | 1458.08 MBit/s | 2013.88 MBit/s lz4hc |1.00 | 173.19 MBit/s | 2012.37 MBit/s 842 |1.00 | 64.10 MBit/s | 2013.64 MBit/s Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> Changes since v2: - added linux-kernel Mailinglist Changes since v1: - improved documentation - improved code style - replaced numerous casts with get_unaligned* - added tests in crypto/testmgr.h/c - added zBeWalgo to the list of algorithms shown by /sys/block/zram0/comp_algorithm --- crypto/Kconfig| 8 + crypto/Makefile | 1 + crypto/testmgr.c | 10 + crypto/testmgr.h | 134 + crypto/zbewalgo.c | 176 drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- include/linux/zbewalgo.h | 53 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 112 lib
[PATCH v2] crypto: add zBeWalgo compression for zram
encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Benchmarks: To obtain reproducable benchmarks, the datasets were first loaded into a userspace-program. Than the data is written directly to a clean zram-partition without any filesystem. Between writing and reading 'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are wall clock times, and the benchmarks are using only one cpu-core at a time. The new algorithm is compared to all available compression algorithms from the crypto-api. Zstd is not in the list of compared algorithms, because it is not available for Zram in the currently available kernel trees. - '/dev/zero' (8 GiB) This dataset is used to measure the speed limitations for ZRAM. ZRAM filters zero-data internally and does not even call the specified compression algorithm. algorithm | compression| decompression --zram-- | 2724.08 MBit/s | 2828.87 MBit/s - 'ecoham' (100 MiB) This dataset is one of the input files for the scientific application ECOHAM which runs an ocean simulation. This dataset contains a lot of zeros. Where the data is not zero there are arrays of floating point values, adjacent float values are likely to be similar to each other, allowing for high compression ratios. algorithm | ratio | compression| decompression zbewalgo | 12.94 | 294.10 MBit/s | 1242.59 MBit/s deflate | 12.54 | 75.51 MBit/s | 736.39 MBit/s 842 | 12.26 | 182.59 MBit/s | 683.61 MBit/s lz4hc | 12.00 | 51.23 MBit/s | 1524.73 MBit/s lz4 | 10.68 | 1334.37 MBit/s | 1603.54 MBit/s lzo |9.79 | 1333.76 MBit/s | 1534.63 MBit/s - 'source-code' (800 MiB) This dataset is a tarball of the source-code from a linux-kernel. algorithm | ratio | compression| decompression deflate |3.27 | 42.48 MBit/s | 250.36 MBit/s lz4hc |2.40 | 104.14 MBit/s | 1150.53 MBit/s lzo |2.27 | 444.77 MBit/s | 886.97 MBit/s lz4 |2.18 | 453.08 MBit/s | 1101.45 MBit/s 842 |1.65 | 64.10 MBit/s | 158.40 MBit/s zbewalgo |1.19 | 52.89 MBit/s | 197.58 MBit/s - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the running hpcg-benchmark. At the time of the snapshot, that application performed a sparse matrix - vector multiplication. algorithm | ratio | compression| decompression zbewalgo | 16.16 | 179.97 MBit/s | 468.36 MBit/s deflate |9.52 | 65.11 MBit/s | 632.69 MBit/s lz4hc |4.96 | 193.33 MBit/s | 1607.12 MBit/s 842 |4.20 | 150.99 MBit/s | 316.22 MBit/s lzo |4.14 | 922.74 MBit/s | 865.32 MBit/s lz4 |3.79 | 908.39 MBit/s | 1375.33 MBit/s - 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar, but not equal. This array is produced by a partial differential equation solver using a Jakobi-implementation. algorithm | ratio | compression| decompression zbewalgo |1.30 | 203.30 MBit/s | 530.87 MBit/s deflate |1.02 | 37.06 MBit/s | 1131.88 MBit/s lzo |1.00 | 1741.46 MBit/s | 2012.78 MBit/s lz4 |1.00 | 1458.08 MBit/s | 2013.88 MBit/s lz4hc |1.00 | 173.19 MBit/s | 2012.37 MBit/s 842 |1.00 | 64.10 MBit/s | 2013.64 MBit/s Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> Changes since v1: - improved documentation - improved code style - replaced numerous casts with get_unaligned* - added tests in crypto/testmgr.h/c - added zBeWalgo to the list of algorithms shown by /sys/block/zram0/comp_algorithm --- crypto/Kconfig| 8 + crypto/Makefile | 1 + crypto/testmgr.c | 10 + crypto/testmgr.h | 134 + crypto/zbewalgo.c | 176 drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- include/linux/zbewalgo.h | 53 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c
Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Hi, I am working on a new Version for this patch addressing all comments, and following all guidelines. Best Regards, Benjamin Warnke
Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Hi, I modified my code as suggested by Stephan and Eric. Moving the code from the header files into *.c source files slowed down the compression and decompression speed by a factor of up to 20. I made no changes to the code itself, that would explain, why it is so much slower. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig| 8 + crypto/Makefile | 1 + crypto/zbewalgo.c | 208 include/linux/zbewalgo.h | 59 + lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 111 + lib/zbewalgo/BWT.h| 34 +++ lib/zbewalgo/JBE.c| 131 ++ lib/zbewalgo/JBE.h| 26 ++ lib/zbewalgo/JBE2.c | 135 +++ lib/zbewalgo/JBE2.h | 26 ++ lib/zbewalgo/MTF.c| 98 lib/zbewalgo/MTF.h| 26 ++ lib/zbewalgo/Makefile | 4 + lib/zbewalgo/RLE.c| 130 ++ lib/zbewalgo/RLE.h| 26 ++ lib/zbewalgo/bewalgo.c| 369 + lib/zbewalgo/bewalgo.h| 26 ++ lib/zbewalgo/bewalgo2.c | 385 ++ lib/zbewalgo/bewalgo2.h | 26 ++ lib/zbewalgo/bitshuffle.c | 101 lib/zbewalgo/bitshuffle.h | 26 ++ lib/zbewalgo/huffman.c| 227 ++ lib/zbewalgo/huffman.h| 26 ++ lib/zbewalgo/include.h| 106 + lib/zbewalgo/zbewalgo.c | 590 ++ 27 files changed, 2909 insertions(+) create mode 100644 crypto/zbewalgo.c create mode 100644 include/linux/zbewalgo.h create mode 100644 lib/zbewalgo/BWT.c create mode 100644 lib/zbewalgo/BWT.h create mode 100644 lib/zbewalgo/JBE.c create mode 100644 lib/zbewalgo/JBE.h create mode 100644 lib/zbewalgo/JBE2.c create mode 100644 lib/zbewalgo/JBE2.h create mode 100644 lib/zbewalgo/MTF.c create mode 100644 lib/zbewalgo/MTF.h create mode 100644 lib/zbewalgo/Makefile create mode 100644 lib/zbewalgo/RLE.c create mode 100644 lib/zbewalgo/RLE.h create mode 100644 lib/zbewalgo/bewalgo.c create mode 100644 lib/zbewalgo/bewalgo.h create mode 100644 lib/zbewalgo/bewalgo2.c create mode 100644 lib/zbewalgo/bewalgo2.h create mode 100644 lib/zbewalgo/bitshuffle.c create mode 100644 lib/zbewalgo/bitshuffle.h create mode 100644 lib/zbewalgo/huffman.c create mode 100644 lib/zbewalgo/huffman.h create mode 100644 lib/zbewalgo/include.h create mode 100644 lib/zbewalgo/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b44c0ae04..f63f543e9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1667,6 +1667,14 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "ZBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the ZBeWalgo algorithm. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c new file mode 100644 index 0..3d12f9cdf --- /dev/null +++ b/crypto/zbewalgo.c @@ -0,0 +1,208 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include "linux/zbewalgo.h" + + +struct zbewalgo_ctx { + void *zbewalgo_comp_mem; +}; + +static void *zbewalgo_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = vmalloc(zbewalgo_get_wrkmem_size()); + if (!ctx) + return ERR_PTR(-ENOMEM); + return ctx; +} + +static int zbewalgo_init(struct crypto_tfm *tfm) +{ + struct zbewalgo_ctx *ctx = crypto_tfm_
Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Hi Stephan, >>> In general: I do not think that having larger C functions in header files >>> is a proper coding style. >> >> How should I solve this? >> >> Option 1: >> Move everything in the lib/zbewalgo folder into a single source file. >> This way there is no function defined in a header file. >> I separated the code into different files because the different partial >> compression algorithms are independent from each other. >> >> Option 2: >> Move each header file in its own *.c file, while keeping the >> function-declarations in the header. If the code is scattered in multiple >> source files each of the partial algorithms would show up as an independent >> module. All these modules would load simultaneously with the module >> zbewalgo_compress The module zbewalgo_compress requires an array of all >> partial algorithms. This would spam the 'lsmod' list with unneccessary >> details. > > A module may be compiled from multiple C files. So, moving the code into > separate C files and link them into one object / KO should be considered. I will start moving the code into multiple C files by tomorrow. >>> Why are there __attribute__ ((unused)) function parameters, such as in >>> compress_bitshuffle and others? >> >> The zbewalgo algorithm is a container-algorithm for compression functions >> with the ability to concatenate multiple algorithms. To be able to call any >> compression algorithm the same way, I defined 'struct zbewalgo_alg' as the >> interface for all those independent compression algorithms. Some of the >> partial algorithms do not require all parameters. To silence compiler >> warnings (if additional warning flags are enabled) I explicitly add the >> 'unused'-parameter > > Linux does not enable the compiler warning about unused parameters. This is my first patch request. I read somewhere, that the code should be compiled with additional warnings enabled before submission, to ensure, that the patch does not include useless code. I will remove that attribute. Benjamin
Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Hi Eric, >> Currently ZRAM uses the compression-algorithms from the crypto-api. >> None of the current compression-algorithms in the crypto-api is designed >> to compress 4KiB chunks of data efficiently. >> This patch adds a new compression-algorithm especially designed for ZRAM, >> to compress small pieces of data more efficiently. > > This is some interesting work, and I like the idea of doing transforms > specialized for in-memory data. However, where can I find more information > about this new compression algorithm? I invented this algorithm during the last months for an university lecture. I have not published it before, that is why you can not find anything at google. > What does "zbewalgo" even stand for / > mean? Googling it turns up nothing. zbewalgo: z=zram be=Benjamin wa=Warnke algo=Algorithm > You are going to have to be much more specific what you mean by "efficiently". > Efficiency usually implies speed, yet even by your own numbers LZ4 is much > faster than "zbewalgo", both for compression and decompression. In this context efficient refers to the compression ratio. > If the goal is to provide an algorithm more tuned for compression ratio than > speed in comparison to LZ4, then the omission of Zstandard from your > benchmarks > is strange, especially given that Zstandard is available in the kernel now. I was not aware of Zstandard in the kernel. #echo zstandard > /sys/block/zram0/comp_algorithm returns "write error: Invalid argument" #cat /sys/block/zram0/comp_algorithm does not show Zstandard I pulled the latest commit from git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git There is no CONFIG_CRYPTO_ZSTD (or similar) flag in my .config file. How can I enable Zstandard for use within zram? > The proposed "zbewalgo" decompressor also doesn't handle invalid inputs, which > means it cannot be used on untrusted data. > This isn't acceptable without > justification (since people may use it on untrusted data, creating security > vulnerabilities), and it also makes it not really a fair comparison with LZ4 > since the LZ4 decompressor does handle invalid inputs, at least in the mode > exposed through the crypto API. Since zbewalgo is intended for use within zram, the data-source for the decompression algorithm is the same computer. I assume, that the data is never corrupted in any way, because it is never exposed to untrusted hardware. To catch all invalid inputs a lot of branches would be required. Each additional if-branch in the code for handling untrusted data slows down the algorithm. In the zram context only blocks of 4KiB are going to be compressed. The zbewalgo algorithm executes multiple (de)compression steps in a row. Each step would need to fully verify, that the data is valid, even if the preceding algorithm does not detect an error. All together, the relative cpu-time cost for invalid data is high. I could add a comment to the decompression functions, but that would not be visible for the final user. Benjamin
Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Hi Stephan, thanks for your fast response. > Please run checkpatch.pl on the patch and fix the formatting issues. I've run checkpatch.pl again and fixed all errors and warnings except the warnings about printk. Compression does not have it's own subsystem, that is why I used printk(KERN_INFO ... > In general: I do not think that having larger C functions in header files is > a > proper coding style. How should I solve this? Option 1: Move everything in the lib/zbewalgo folder into a single source file. This way there is no function defined in a header file. I separated the code into different files because the different partial compression algorithms are independent from each other. Option 2: Move each header file in its own *.c file, while keeping the function-declarations in the header. If the code is scattered in multiple source files each of the partial algorithms would show up as an independent module. All these modules would load simultaneously with the module zbewalgo_compress The module zbewalgo_compress requires an array of all partial algorithms. This would spam the 'lsmod' list with unneccessary details. I would prefer option 1, since it adds less overhead. > Also, having static variables header files is also not > nice. I will remove the static modifier for variables in the header files > Do not redefine code that already exists. For example, MIN/MAX exists: min_t > and max_t. Ok, I will use min_t and max_t > Why are there __attribute__ ((unused)) function parameters, such as in > compress_bitshuffle and others? The zbewalgo algorithm is a container-algorithm for compression functions with the ability to concatenate multiple algorithms. To be able to call any compression algorithm the same way, I defined 'struct zbewalgo_alg' as the interface for all those independent compression algorithms. Some of the partial algorithms do not require all parameters. To silence compiler warnings (if additional warning flags are enabled) I explicitly add the 'unused'-parameter Best regards, Benjamin
Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram
Currently ZRAM uses the compression-algorithms from the crypto-api. None of the current compression-algorithms in the crypto-api is designed to compress 4KiB chunks of data efficiently. This patch adds a new compression-algorithm especially designed for ZRAM, to compress small pieces of data more efficiently. Benchmarks: To obtain reproduceable benchmarks, the datasets were first loaded into a userspace-program. Than the data is written directly to a clean zram-partition without any filesystem. Between writing and reading 'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are wall clock times, and the benchmarks are using only one cpu-core at a time. The new algorithm is compared to all available compression algorithms from the crypto-api. - '/dev/zero' (8 GiB) This dataset is used to measure the speed limitations for ZRAM. ZRAM filters zero-data internally and does not even call the specified compression algorithm. algorithm | compression | decompression --zram-- | 2913.92 MiB/s | 3116.38 MiB/s - 'ecoham' (100 MiB) This dataset is one of the input files for the scientific application ECOHAM which runs an ocean simulation. This dataset contains a lot of zeros. Where the data is not zero there are arrays of floating point values, neighboring float values are likely to be similar to each other, allowing high compression ratios. algorithm | ratio | compression | decompression zbewalgo | 12.77 | 417.35 MiB/s | 1306.21 MiB/s deflate | 12.54 | 73.51 MiB/s | 788.85 MiB/s 842 | 12.26 | 199.22 MiB/s | 675.74 MiB/s lz4hc | 12.00 | 49.99 MiB/s | 1787.79 MiB/s lz4 | 10.68 | 1356.77 MiB/s | 1690.34 MiB/s lzo | 9.79 | 1363.05 MiB/s | 1640.70 MiB/s - 'source-code' (800 MiB) This dataset is a tarball of the source-code from a linux-kernel. algorithm | ratio | compression | decompression deflate | 3.27 | 42.36 MiB/s | 256.22 MiB/s lz4hc | 2.40 | 99.62 MiB/s | 1187.15 MiB/s lzo | 2.27 | 432.95 MiB/s | 944.74 MiB/s lz4 | 2.18 | 444.07 MiB/s | 1159.49 MiB/s zbewalgo | 1.89 | 43.44 MiB/s | 35.73 MiB/s 842 | 1.65 | 62.73 MiB/s | 149.71 MiB/s - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the running hpcg-benchmark. At the time of the snapshot, that application performed a sparse matrix - vector multiplication. algorithm | ratio | compression | decompression zbewalgo | 15.88 | 174.49 MiB/s | 372.03 MiB/s deflate | 9.52 | 64.47 MiB/s | 705.95 MiB/s lz4hc | 4.96 | 190.97 MiB/s | 1727.96 MiB/s 842 | 4.20 | 144.33 MiB/s | 284.99 MiB/s lzo | 4.14 | 910.92 MiB/s | 1272.87 MiB/s lz4 | 3.79 | 868.92 MiB/s | 1500.85 MiB/s - 'partdiff' (8 GiB) Array of double values. Neighbored values are similar, but not equal. This array is produced by an partial differential equation solver using a Jakobi-implementation. algorithm | ratio | compression | decompression zbewalgo | 1.17 | 220.36 MiB/s | 678.05 MiB/s deflate | 1.02 | 36.87 MiB/s | 1191.34 MiB/s lzo | 1.00 | 1727.67 MiB/s | 2103.98 MiB/s lz4 | 1.00 | 1417.10 MiB/s | 2127.88 MiB/s lz4hc | 1.00 | 150.14 MiB/s | 2130.08 MiB/s 842 | 1.00 | 63.99 MiB/s | 2131.95 MiB/s In the category 'compression ratio', the new algorithm zbewalgo is often the best choice. The faster algorithms with lower compression ratio are going to fill the zram-partition faster and finally writing their data to a backing device - which in turn is much slower than imediately using the zbewalgo algorithm. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/zbewalgo.c| 207 + include/linux/zbewalgo.h | 59 ++ lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.h | 116 lib/zbewalgo/JBE.h | 135 ++ lib/zbewalgo/JBE2.h | 139 ++ lib/zbewalgo/MTF.h | 102 ++ lib/zbewalgo/Makefile| 3 + lib/zbewalgo/RLE.h | 134 ++ lib/zbewalgo/bewalgo.h | 378 ++ lib/zbewalgo/bewalgo2.h | 388 +++ lib/zbewalgo/bitshuffle.h| 105 +++ lib/zbewalgo/huffman.h | 232 +++ lib/zbewalgo/include.h | 151 +++ lib/zbewalgo/settings.h | 220 ++ lib/zbewalgo/zbewalgo_compress.c | 372 + 19 files changed, 2754 insertions(+) create mode 100644 crypto/zbewalgo.c create mode 100644 include/linux/zbewalgo.h create mode 100644 lib/zbewalgo/BWT.h create mode 100644 lib/zbewalgo/JBE.h create mode 100644 lib/zbewalgo/JBE2.h cr