[PATCH v7 2/5] crypto: add zBeWalgo to crypto-api

2018-04-13 Thread Benjamin Warnke
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by
zram.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/Kconfig |  12 
 crypto/Makefile|   1 +
 crypto/testmgr.c   |  10 +++
 crypto/testmgr.h   | 134 
 crypto/zbewalgo.c  | 164 +
 drivers/block/zram/zcomp.c |   3 +
 6 files changed, 324 insertions(+)
 create mode 100644 crypto/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 76e8c88c..749457a6 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1686,6 +1686,18 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "zBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the zBeWalgo compression algorithm. This algorithm
+ accepts only input sizes of at most one page at once.
+ To achieve high compression ratios zbewalgo can call multiple
+ transformation and compression algorithms in a row to optimize
+ the compressed size.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index 4fc69fe9..493cbd65 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -124,6 +124,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index af4a01c5..53fd43d1 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3611,6 +3611,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.dec = __VECS(tf_xts_dec_tv_template)
}
}
+   }, {
+   .alg = "zbewalgo",
+   .test = alg_test_comp,
+   .fips_allowed = 1,
+   .suite = {
+   .comp = {
+   .comp = __VECS(zbewalgo_comp_tv_template),
+   .decomp = __VECS(zbewalgo_decomp_tv_template)
+   }
+   }
}, {
.alg = "zlib-deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 004c0a0f..960bfbcf 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -37009,6 +37009,140 @@ static const struct hash_testvec 
bfin_crc_tv_template[] = {
 
 };
 
+static const struct comp_testvec zbewalgo_comp_tv_template[] = {
+   {
+   .inlen  = 512,
+   .outlen = 402,
+   .input  =
+   
"\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d"
+   
"\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d"
+   
"\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d"
+   
"\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d"
+   
"\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d"
+   
"\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d"
+   
"\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d"
+   
"\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d"
+   
"\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d"
+   
"\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d"
+   
"\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d"
+   
"\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d"
+   
"\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d"
+   
"\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d"
+   
"\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d"
+   
"\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d"
+   
"\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d"
+   
"\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d"
+   
"\x76\x7f\x4b\xaf\x49\x9e\xab\x3d\xeb\x38\x53\xea\xc2\x9b\xab\x3d"
+ 

[PATCH v7 3/5] crypto: add unsafe decompression to api

2018-04-13 Thread Benjamin Warnke
Up to Version 3 of this patch the decompressor of zbewalgo did not verify
that there is no overflow in the output buffer. Now zbewalgo includes a
safe decompressor which does check for buffer overflows and heap-error.
ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version,
which is a bit faster, but does no error checking. These unsafe decompressors
can be applied when the datasource and the whole datapath is trusted.

This patch publishes these existing functions in the crypto-api

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/842.c |  3 ++-
 crypto/compress.c| 10 ++
 crypto/crypto_null.c |  3 ++-
 crypto/deflate.c |  3 ++-
 crypto/lz4.c | 23 ++-
 crypto/lz4hc.c   | 23 ++-
 crypto/lzo.c |  3 ++-
 crypto/testmgr.c | 27 ++-
 crypto/zbewalgo.c| 29 -
 drivers/block/zram/zram_drv.c| 34 +-
 drivers/block/zram/zram_drv.h|  1 +
 drivers/crypto/cavium/zip/zip_main.c |  6 --
 drivers/crypto/nx/nx-842-powernv.c   |  3 ++-
 drivers/crypto/nx/nx-842-pseries.c   |  3 ++-
 include/linux/crypto.h   | 16 
 15 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index bc26dc94..7e74ea26 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -112,7 +112,8 @@ static struct crypto_alg alg = {
.cra_exit   = crypto842_exit,
.cra_u  = { .compress = {
.coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
+   .coa_decompress = crypto842_decompress,
+   .coa_decompress_unsafe  = crypto842_decompress } }
 };
 
 static struct scomp_alg scomp = {
diff --git a/crypto/compress.c b/crypto/compress.c
index f2d52292..bec79624 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm,
   dlen);
 }
 
+static int crypto_decompress_unsafe(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+u8 *dst, unsigned int *dlen)
+{
+   return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src,
+ slen, dst,
+ dlen);
+}
+
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
struct compress_tfm *ops = >crt_compress;
 
ops->cot_compress = crypto_compress;
ops->cot_decompress = crypto_decompress;
+   ops->cot_decompress_unsafe = crypto_decompress_unsafe;
 
return 0;
 }
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 20ff2c74..6e15e8c0 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { {
.cra_module =   THIS_MODULE,
.cra_u  =   { .compress = {
.coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
+   .coa_decompress =   null_compress,
+   .coa_decompress_unsafe  =   null_compress } }
 } };
 
 MODULE_ALIAS_CRYPTO("compress_null");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 94ec3b36..4b681a37 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -286,7 +286,8 @@ static struct crypto_alg alg = {
.cra_exit   = deflate_exit,
.cra_u  = { .compress = {
.coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
+   .coa_decompress = deflate_decompress,
+   .coa_decompress_unsafe  = deflate_decompress } }
 };
 
 static struct scomp_alg scomp[] = { {
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d..60a1914b 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned 
int slen,
return 0;
 }
 
+static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen,
+  void *ctx)
+{
+   int out_len = LZ4_decompress_fast(src, dst, *dlen);
+
+   if (out_len < 0)
+   return -EINVAL;
+
+   *dlen = out_len;
+   return 0;
+}
+
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -117,6 +130,13 @@ static int lz

[PATCH v7 4/5] crypto: configurable compression level

2018-04-13 Thread Benjamin Warnke
Most compression algorithms published by the crypto api are supporting
multiple different compression levels. The crypto api currently just
calls these algorithms with their default compression level.

This patch enables the caller to specify the compression level.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/api.c  | 76 +++
 crypto/deflate.c  | 16 +
 crypto/lz4.c  | 16 +
 crypto/lz4hc.c| 13 +---
 crypto/testmgr.c  |  2 +-
 drivers/block/zram/zcomp.c| 10 +++---
 drivers/block/zram/zcomp.h|  3 +-
 drivers/block/zram/zram_drv.c | 24 --
 drivers/block/zram/zram_drv.h |  1 +
 fs/pstore/platform.c  |  2 +-
 fs/ubifs/compress.c   |  2 +-
 include/linux/crypto.h|  9 +++--
 mm/zswap.c|  2 +-
 net/xfrm/xfrm_ipcomp.c|  3 +-
 14 files changed, 147 insertions(+), 32 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 1d5290c6..81c3d416 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -388,6 +388,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg 
*alg, u32 type,
 }
 EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg,
+  u32 type, u32 mask, int level)
+{
+   struct crypto_tfm *tfm = NULL;
+   unsigned int tfm_size;
+   int err = -ENOMEM;
+
+   tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
+   tfm = kzalloc(tfm_size, GFP_KERNEL);
+   if (!tfm)
+   goto out_err;
+
+   tfm->__crt_alg = alg;
+   if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS)
+   tfm->crt_compress.cot_level = level;
+
+   err = crypto_init_ops(tfm, type, mask);
+   if (err)
+   goto out_free_tfm;
+
+   if (!tfm->exit && alg->cra_init) {
+   err = alg->cra_init(tfm);
+   if (err)
+   goto cra_init_failed;
+   }
+
+   goto out;
+
+cra_init_failed:
+   crypto_exit_ops(tfm);
+out_free_tfm:
+   if (err == -EAGAIN)
+   crypto_shoot_alg(alg);
+   kfree(tfm);
+out_err:
+   tfm = ERR_PTR(err);
+out:
+   return tfm;
+}
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress);
+
 /*
  * crypto_alloc_base - Locate algorithm and allocate transform
  * @alg_name: Name of algorithm
@@ -444,6 +485,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, 
u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
+struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type,
+ u32 mask, int level)
+{
+   struct crypto_tfm *tfm;
+   int err;
+
+   for (;;) {
+   struct crypto_alg *alg;
+
+   alg = crypto_alg_mod_lookup(alg_name, type, mask);
+   if (IS_ERR(alg)) {
+   err = PTR_ERR(alg);
+   goto err;
+   }
+
+   tfm = __crypto_alloc_tfm_compress(alg, type, mask, level);
+   if (!IS_ERR(tfm))
+   return tfm;
+
+   crypto_mod_put(alg);
+   err = PTR_ERR(tfm);
+
+err:
+   if (err != -EAGAIN)
+   break;
+   if (fatal_signal_pending(current)) {
+   err = -EINTR;
+   break;
+   }
+   }
+
+   return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base_compress);
+
 void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
 {
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 4b681a37..54a2ff21 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -24,6 +24,7 @@
  * it is not needed for IPCOMP and keeps the code simpler.  It can be
  * implemented if someone wants it.
  */
+
 #include 
 #include 
 #include 
@@ -43,7 +44,7 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level)
 {
int ret = 0;
struct z_stream_s *stream = >comp_stream;
@@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
}
if (format)
-   ret = zlib_deflateInit(stream, 3);
+   ret = zlib_deflateInit(stream, level);
else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   ret = zlib_deflateInit2(stream, level, Z_DEFLATED,
-DEFLATE_DEF_WINBITS,
DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
@@ -109,11 +110,11 @@ static void

[PATCH v7 1/5] add compression algorithm zBeWalgo

2018-04-13 Thread Benjamin Warnke
zBeWalgo is a completely new algorithm - Currently it is not published
somewhere else right now, googleing it would not show up any results. The
following section describes how the algorithm works.

zBeWalgo itself is a container compression algorithm, which can execute
multiple different compression and transformation algorithms after each other.
The execution of different compression algorithms after each other will be
called 'combination' in this description and in the code. Additionally to be
able to execute combinations of algorithms, zBeWalgo can try different
combinations on the same input. This allows high compression ratios on
completely different datasets, which would otherwise require its own
algorithm each. Executing all known combinations on each input page would be
very slow. Therefore the data is compressed at first with that combination,
which was already successful on the last input page. If the compressed data
size of the current page is similar to that of the last page, the compressed
data is returned immediately without even trying the other combinations. Even
if there is no guarantee that consecutive calls to the algorithm belong to
each other, the speed improvement is obvious.

ZRAM uses zsmalloc for the management of the compressed pages. The largest
size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than
that threshold, ZRAM ignores the compression and writes the uncompressed page
instead. As a consequence it is useless to continue compression, if the
algorithm detects, that the data can not be compressed using the current
combination. The threshold for aborting compression can be changed via sysfs at
any time, even if the algorithm is currently in use. If a combination fails to
compress the data, zBeWalgo tries the next combination. If no combination is
able to reduce the data in size, zBeWalgo returns a negative value.

Each combination consists of up to 7 compression and transformation steps.
Combinations can be added and removed at any time via sysfs. Already compressed
Data can always be decompressed, even if the combination used to produce it
does not exist anymore. Technically the user could add up to 256 combinations
concurrently, but that would be very time consuming if the data can not be
compressed.

To be able to build combinations and call different algorithms, all those
algorithms are implementing the same interface. This enables the user to
specify additional combinations while ZRAM is running.

Within the combinations many different algorithms can be used. Some of those
algorithms are published. This patch adds the following algorithms to be used
within the combinations:
- bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and
'D. J. Wheeler' in 1994. This implementation uses counting sort for
sorting the data. Their original paper is online available at:
http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
- mtf: The Move-To-Front algorithm as described by 'M. Burrows' and
'D. J. Wheeler' in the same paper as bwt.
- jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012.
https://arxiv.org/pdf/1209.1045.pdf
- jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive
Bytes can increase the compression ratio, if for example the first
4 Bits of each Byte are zero. If jbe2 is called after mtf, this
happens ofthen.
- rle: Run Length Encoding
- huffman: Huffman encoding
- bewalgo: I invented this algorithm for my bachelors thesis
'Page-Based compression in the Linux Kernel'. This algorithm is
mainly inspired by lz4, focusing on increasing the speed even more,
with the help of page aligned read an write access. To achieve the
page alignment, the input and output data is accessed only in
blocks of 8 Bytes, therefore the encoding of the compressed data is
changed.

https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
algorithm. The input is read in blocks of 8 Bytes. These Blocks
are added to an avl-tree. The avl-tree is mapped directly to an
array. The encoding is a variation of Run Length Encoding using the
indices in the avl-tree as data. The reason for using the tree
with indices is, that the indices can be encoded in less then
8 Bytes each.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 include/linux/zbewalgo.h  |  50 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 120 
 lib/zbewalgo/BWT.h|  21 ++
 lib/zbewalgo/JBE.c| 204 +
 lib/zbewalgo/JBE.h|  13 +
 lib/zbewalgo/JBE2.c   | 221 ++
 lib/zbewalgo/JBE2.h   |  13 +
 lib/zbewalgo/MTF.c| 122 
 lib/zbewalgo/MTF.h|  13 +
 lib/zbewalgo/Makefile

[PATCH v7 0/5] add compression algorithm zBeWalgo

2018-04-13 Thread Benjamin Warnke
ewalgo'*2.37   337.07   463.32
zbewalgo' 2.37   337.07   468.96
zbewalgo* 2.60   101.17   578.35
zbewalgo  2.60   101.17   586.88


- 'Isabella TCf01'
This dataset is an array of floating point values between -83.00402 and 
31.51576.
Detailed Information about this dataset is online available at
http://www.vets.ucar.edu/vg/isabeldata/readme.html

zBeWalgo is the only algorithm which can compress this dataset with a noticeable
compressionratio.

Algorithmratiowrite read
842   1.0060.09  1956.26
--hdd--   1.00   134.70   156.62
lz4hc_01  1.00   154.81  1839.37
lz4hc*_01 1.00   154.81  2105.53
lz4hc_10  1.00   157.33  2078.69
lz4hc*_10 1.00   157.33  2113.14
lz4hc_09  1.00   158.50  2018.51
lz4hc*_09 1.00   158.50  2093.65
lz4hc*_02 1.00   159.54  2104.91
lz4hc_02  1.00   159.54  2117.34
lz4hc_03  1.00   161.26  2070.76
lz4hc*_03 1.00   161.26  2107.27
lz4hc*_08 1.00   161.34  2100.74
lz4hc_08  1.00   161.34  2105.26
lz4hc*_04 1.00   161.95  2080.96
lz4hc_04  1.00   161.95  2104.00
lz4hc_05  1.00   162.17  2044.43
lz4hc*_05 1.00   162.17  2101.74
lz4hc*_06 1.00   163.61  2087.19
lz4hc_06  1.00   163.61  2104.61
lz4hc_07  1.00   164.51  2094.78
lz4hc*_07 1.00   164.51  2105.53
lz4_011.00  1134.89  2109.70
lz4*_01   1.00  1134.89  2118.71
lz4*_08   1.00  1141.96  2104.87
lz4_081.00  1141.96  2118.97
lz4_091.00  1145.55  2087.76
lz4*_09   1.00  1145.55  2118.85
lz4_021.00  1157.28  2094.33
lz4*_02   1.00  1157.28  2124.67
lz4*_03   1.00  1194.18  2106.36
lz4_031.00  1194.18  2119.89
lz4_041.00  1195.09  2117.03
lz4*_04   1.00  1195.09  2120.23
lz4*_05   1.00  1225.56  2109.04
lz4_051.00  1225.56  2120.52
lz4*_06   1.00  1261.67  2109.14
lz4_061.00  1261.67  2121.13
lz4*_07   1.00  1270.86  1844.63
lz4_071.00  1270.86  2041.08
lz4_101.00  1305.36  2109.22
lz4*_10   1.00  1305.36  2120.65
lzo   1.00  1338.61  2109.66
zstd_17   1.0313.93  1138.94
zstd_18   1.0314.01  1170.78
zstd_16   1.0327.12  1073.75
zstd_15   1.0343.52  1061.97
zstd_14   1.0349.60  1082.98
zstd_12   1.0355.03  1042.43
zstd_13   1.0355.14  1173.50
zstd_11   1.0355.24  1178.05
zstd_10   1.0370.01  1173.05
zstd_07   1.03   118.10  1041.92
zstd_06   1.03   123.00  1171.59
zstd_05   1.03   124.61  1165.74
zstd_01   1.03   166.80  1005.29
zstd_04   1.03   170.25  1127.75
zstd_03   1.03   171.40  1172.34
zstd_02   1.03   174.08  1017.34
zstd_09   1.03   195.30  1176.82
zstd_08   1.03   195.98  1175.09
deflate_9 1.0530.15   483.55
deflate_8 1.0530.45   466.67
deflate_5 1.0531.25   480.92
deflate_4 1.0531.84   472.81
deflate_7 1.0531.84   484.18
deflate_6 1.0531.94   481.37
deflate_2 1.0533.07   484.09
deflate_3 1.0533.11   463.57
deflate_1 1.0533.19   469.71
zstd_22   1.06 8.89   647.75
zstd_21   1.0610.70   700.11
zstd_20   1.0610.80   723.42
zstd_19   1.0612.41   764.24
zbewalgo* 1.51   146.45   581.43
zbewalgo  1.51   146.45   592.86
zbewalgo'*1.5438.14   120.96
zbewalgo' 1.5438.14   125.81


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

Benjamin Warnke (5):
  add compression algorithm zBeWalgo
  crypto: add zBeWalgo to crypto-api
  crypto: add unsafe decompression to api
  crypto: configurable compression level
  crypto: add flag for unstable encoding

 crypto/842.c |   3 +-
 crypto/Kconfig   |  12 +
 crypto/Makefile  |   1 +
 crypto/api.c |  76 
 crypto/compress.c|  10 +
 crypto/crypto_null.c |   3 +-
 crypto/deflate.c |  19 +-
 crypto/lz4.c |  39 +-
 crypto/lz4hc.c   |  36 +-
 crypto/lzo.c |   3 +-
 crypto/testmgr.c |  39 +-
 crypto/testmgr.h | 134 +++
 crypto/zbewalgo.c| 191 ++
 drivers/block/zram/zcomp.c   |  13 +-
 drivers/block/zram/zcomp.h   |   3 +-
 drivers/block/zram/zram_drv.c|  56 ++-
 drivers/block/zram/zram_drv.h|   2 +
 drivers/crypto/cavium/zip/zip_main.c |   6 +-
 drivers/crypto/nx/nx-842-powernv.c   |   3 +-
 drivers/crypto/nx/nx-842-pseries.c   |   3 +-
 fs/ubifs/compress.c  |   2 +-
 include/linux/crypto.h   |  31 +-
 include/linux/zbewalgo.h |  50 +++
 lib/Kconfig  |   3 +
 lib/Makefile |   1 +
 lib/zbewalgo/BWT.c   | 120 ++
 lib/zbewalgo/BWT.h   |  21 ++
 lib/zbewalgo/JBE.c

[PATCH v7 5/5] crypto: add flag for unstable encoding

2018-04-13 Thread Benjamin Warnke
The data-format of zBeWalgo, and some other algorithms is unstable. To
identify such unstable algorithms this patch adds a new flag to the
crypto-api.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/zbewalgo.c  | 2 +-
 include/linux/crypto.h | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
index 9db0d43b..e57b5ced 100644
--- a/crypto/zbewalgo.c
+++ b/crypto/zbewalgo.c
@@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct 
crypto_tfm *tfm,
 
 static struct crypto_alg crypto_alg_zbewalgo = {
.cra_name = "zbewalgo",
-   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING,
.cra_ctxsize = sizeof(struct zbewalgo_ctx),
.cra_module = THIS_MODULE,
.cra_init = zbewalgo_init,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 6bfb1aea..2228dd08 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,12 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY0x4000
 
+/*
+ * Set if the algorithm is new and it is likely that the encoding may
+ * change in near future
+ */
+#define CRYPTO_ALG_UNSTABLE_ENCODING   0x8000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
2.14.1



[PATCH v6 4/5] crypto: configurable compression level

2018-03-26 Thread Benjamin Warnke
Most compression algorithms published by the crypto api are supporting
multiple different compression levels. The crypto api currently just
calls these algorithms with their default compression level.

This patch enables the caller to specify the compression level.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/api.c  | 76 +++
 crypto/deflate.c  | 16 +
 crypto/lz4.c  | 16 +
 crypto/lz4hc.c| 13 +---
 crypto/testmgr.c  |  2 +-
 drivers/block/zram/zcomp.c| 10 +++---
 drivers/block/zram/zcomp.h|  3 +-
 drivers/block/zram/zram_drv.c | 24 --
 drivers/block/zram/zram_drv.h |  1 +
 fs/ubifs/compress.c   |  2 +-
 include/linux/crypto.h|  9 +++--
 mm/zswap.c|  2 +-
 net/xfrm/xfrm_ipcomp.c|  3 +-
 13 files changed, 146 insertions(+), 31 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 70a894e52..dadd4dede 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg 
*alg, u32 type,
 }
 EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg,
+  u32 type, u32 mask, int level)
+{
+   struct crypto_tfm *tfm = NULL;
+   unsigned int tfm_size;
+   int err = -ENOMEM;
+
+   tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
+   tfm = kzalloc(tfm_size, GFP_KERNEL);
+   if (!tfm)
+   goto out_err;
+
+   tfm->__crt_alg = alg;
+   if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS)
+   tfm->crt_compress.cot_level = level;
+
+   err = crypto_init_ops(tfm, type, mask);
+   if (err)
+   goto out_free_tfm;
+
+   if (!tfm->exit && alg->cra_init) {
+   err = alg->cra_init(tfm);
+   if (err)
+   goto cra_init_failed;
+   }
+
+   goto out;
+
+cra_init_failed:
+   crypto_exit_ops(tfm);
+out_free_tfm:
+   if (err == -EAGAIN)
+   crypto_shoot_alg(alg);
+   kfree(tfm);
+out_err:
+   tfm = ERR_PTR(err);
+out:
+   return tfm;
+}
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress);
+
 /*
  * crypto_alloc_base - Locate algorithm and allocate transform
  * @alg_name: Name of algorithm
@@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, 
u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
+struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type,
+ u32 mask, int level)
+{
+   struct crypto_tfm *tfm;
+   int err;
+
+   for (;;) {
+   struct crypto_alg *alg;
+
+   alg = crypto_alg_mod_lookup(alg_name, type, mask);
+   if (IS_ERR(alg)) {
+   err = PTR_ERR(alg);
+   goto err;
+   }
+
+   tfm = __crypto_alloc_tfm_compress(alg, type, mask, level);
+   if (!IS_ERR(tfm))
+   return tfm;
+
+   crypto_mod_put(alg);
+   err = PTR_ERR(tfm);
+
+err:
+   if (err != -EAGAIN)
+   break;
+   if (fatal_signal_pending(current)) {
+   err = -EINTR;
+   break;
+   }
+   }
+
+   return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base_compress);
+
 void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
 {
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 4b681a37c..54a2ff21b 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -24,6 +24,7 @@
  * it is not needed for IPCOMP and keeps the code simpler.  It can be
  * implemented if someone wants it.
  */
+
 #include 
 #include 
 #include 
@@ -43,7 +44,7 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level)
 {
int ret = 0;
struct z_stream_s *stream = >comp_stream;
@@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
}
if (format)
-   ret = zlib_deflateInit(stream, 3);
+   ret = zlib_deflateInit(stream, level);
else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   ret = zlib_deflateInit2(stream, level, Z_DEFLATED,
-DEFLATE_DEF_WINBITS,
DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
@@ -109,11 +110,11 @@ static void deflate_decomp_exit(str

[PATCH v6 2/5] crypto: add zBeWalgo to crypto-api

2018-03-26 Thread Benjamin Warnke
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by
zram.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/Kconfig|  12 
 crypto/Makefile   |   1 +
 crypto/testmgr.c  |  10 +++
 crypto/testmgr.h  | 134 ++
 crypto/zbewalgo.c | 164 ++
 drivers/block/zram/zcomp.c|   3 +
 drivers/block/zram/zram_drv.h |   4 +-
 7 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 crypto/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b75264b09..3ac0d4ca7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1668,6 +1668,18 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "zBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the zBeWalgo compression algorithm. This algorithm
+ accepts only input sizes of at most one page at once.
+ To achieve high compression ratios zbewalgo can call multiple
+ transformation and compression algorithms in a row to optimize
+ the compressed size.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d5e23a142..294075476 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.dec = __VECS(tf_xts_dec_tv_template)
}
}
+   }, {
+   .alg = "zbewalgo",
+   .test = alg_test_comp,
+   .fips_allowed = 1,
+   .suite = {
+   .comp = {
+   .comp = __VECS(zbewalgo_comp_tv_template),
+   .decomp = __VECS(zbewalgo_decomp_tv_template)
+   }
+   }
}, {
.alg = "zlib-deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6044f6906..996d8321e 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -35133,6 +35133,140 @@ static const struct hash_testvec 
bfin_crc_tv_template[] = {
 
 };
 
+static const struct comp_testvec zbewalgo_comp_tv_template[] = {
+   {
+   .inlen  = 512,
+   .outlen = 402,
+   .input  =
+   
"\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d"
+   
"\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d"
+   
"\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d"
+   
"\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d"
+   
"\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d"
+   
"\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d"
+   
"\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d"
+   
"\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d"
+   
"\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d"
+   
"\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d"
+   
"\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d"
+   
"\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d"
+   
"\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d"
+   
"\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d"
+   
"\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d"
+   
"\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d"
+   
"\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d"
+   
"\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d"
+   
"\x76\

[PATCH v6 3/5] crypto: add unsafe decompression to api

2018-03-26 Thread Benjamin Warnke
Up to Version 3 of this patch the decompressor of zbewalgo did not verify
that there is no overflow in the output buffer. Now zbewalgo includes a
safe decompressor which does check for buffer overflows and heap-error.
ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version,
which is a bit faster, but does no error checking. These unsafe decompressors
can be applied when the datasource and the whole datapath is trusted.

This patch publishes these existing functions in the crypto-api

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/842.c |  3 ++-
 crypto/compress.c| 10 ++
 crypto/crypto_null.c |  3 ++-
 crypto/deflate.c |  3 ++-
 crypto/lz4.c | 23 ++-
 crypto/lz4hc.c   | 23 ++-
 crypto/lzo.c |  3 ++-
 crypto/testmgr.c | 27 ++-
 crypto/zbewalgo.c| 29 -
 drivers/block/zram/zram_drv.c| 34 +-
 drivers/block/zram/zram_drv.h|  1 +
 drivers/crypto/cavium/zip/zip_main.c |  6 --
 drivers/crypto/nx/nx-842-powernv.c   |  3 ++-
 drivers/crypto/nx/nx-842-pseries.c   |  3 ++-
 include/linux/crypto.h   | 16 
 15 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index bc26dc942..7e74ea26b 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -112,7 +112,8 @@ static struct crypto_alg alg = {
.cra_exit   = crypto842_exit,
.cra_u  = { .compress = {
.coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
+   .coa_decompress = crypto842_decompress,
+   .coa_decompress_unsafe  = crypto842_decompress } }
 };
 
 static struct scomp_alg scomp = {
diff --git a/crypto/compress.c b/crypto/compress.c
index f2d522924..bec796249 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm,
   dlen);
 }
 
+static int crypto_decompress_unsafe(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+u8 *dst, unsigned int *dlen)
+{
+   return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src,
+ slen, dst,
+ dlen);
+}
+
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
struct compress_tfm *ops = >crt_compress;
 
ops->cot_compress = crypto_compress;
ops->cot_decompress = crypto_decompress;
+   ops->cot_decompress_unsafe = crypto_decompress_unsafe;
 
return 0;
 }
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 20ff2c746..6e15e8c0b 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { {
.cra_module =   THIS_MODULE,
.cra_u  =   { .compress = {
.coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
+   .coa_decompress =   null_compress,
+   .coa_decompress_unsafe  =   null_compress } }
 } };
 
 MODULE_ALIAS_CRYPTO("compress_null");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 94ec3b36a..4b681a37c 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -286,7 +286,8 @@ static struct crypto_alg alg = {
.cra_exit   = deflate_exit,
.cra_u  = { .compress = {
.coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
+   .coa_decompress = deflate_decompress,
+   .coa_decompress_unsafe  = deflate_decompress } }
 };
 
 static struct scomp_alg scomp[] = { {
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d3..60a1914b7 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned 
int slen,
return 0;
 }
 
+static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen,
+  void *ctx)
+{
+   int out_len = LZ4_decompress_fast(src, dst, *dlen);
+
+   if (out_len < 0)
+   return -EINVAL;
+
+   *dlen = out_len;
+   return 0;
+}
+
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -117,6 +130,13 @@ st

[PATCH v6 0/5] add compression algorithm zBeWalgo

2018-03-26 Thread Benjamin Warnke
ewalgo* 2.60   101.17   578.35
zbewalgo  2.60   101.17   586.88


- 'Isabella TCf01'
This dataset is an array of floating point values between -83.00402 and 
31.51576.
Detailed Information about this dataset is online available at
http://www.vets.ucar.edu/vg/isabeldata/readme.html

zBeWalgo is the only algorithm which can compress this dataset with a noticeable
compressionratio.

Algorithmratiowrite read
842   1.0060.09  1956.26
--hdd--   1.00   134.70   156.62
lz4hc_01  1.00   154.81  1839.37
lz4hc*_01 1.00   154.81  2105.53
lz4hc_10  1.00   157.33  2078.69
lz4hc*_10 1.00   157.33  2113.14
lz4hc_09  1.00   158.50  2018.51
lz4hc*_09 1.00   158.50  2093.65
lz4hc*_02 1.00   159.54  2104.91
lz4hc_02  1.00   159.54  2117.34
lz4hc_03  1.00   161.26  2070.76
lz4hc*_03 1.00   161.26  2107.27
lz4hc*_08 1.00   161.34  2100.74
lz4hc_08  1.00   161.34  2105.26
lz4hc*_04 1.00   161.95  2080.96
lz4hc_04  1.00   161.95  2104.00
lz4hc_05  1.00   162.17  2044.43
lz4hc*_05 1.00   162.17  2101.74
lz4hc*_06 1.00   163.61  2087.19
lz4hc_06  1.00   163.61  2104.61
lz4hc_07  1.00   164.51  2094.78
lz4hc*_07 1.00   164.51  2105.53
lz4_011.00  1134.89  2109.70
lz4*_01   1.00  1134.89  2118.71
lz4*_08   1.00  1141.96  2104.87
lz4_081.00  1141.96  2118.97
lz4_091.00  1145.55  2087.76
lz4*_09   1.00  1145.55  2118.85
lz4_021.00  1157.28  2094.33
lz4*_02   1.00  1157.28  2124.67
lz4*_03   1.00  1194.18  2106.36
lz4_031.00  1194.18  2119.89
lz4_041.00  1195.09  2117.03
lz4*_04   1.00  1195.09  2120.23
lz4*_05   1.00  1225.56  2109.04
lz4_051.00  1225.56  2120.52
lz4*_06   1.00  1261.67  2109.14
lz4_061.00  1261.67  2121.13
lz4*_07   1.00  1270.86  1844.63
lz4_071.00  1270.86  2041.08
lz4_101.00  1305.36  2109.22
lz4*_10   1.00  1305.36  2120.65
lzo   1.00  1338.61  2109.66
zstd_17   1.0313.93  1138.94
zstd_18   1.0314.01  1170.78
zstd_16   1.0327.12  1073.75
zstd_15   1.0343.52  1061.97
zstd_14   1.0349.60  1082.98
zstd_12   1.0355.03  1042.43
zstd_13   1.0355.14  1173.50
zstd_11   1.0355.24  1178.05
zstd_10   1.0370.01  1173.05
zstd_07   1.03   118.10  1041.92
zstd_06   1.03   123.00  1171.59
zstd_05   1.03   124.61  1165.74
zstd_01   1.03   166.80  1005.29
zstd_04   1.03   170.25  1127.75
zstd_03   1.03   171.40  1172.34
zstd_02   1.03   174.08  1017.34
zstd_09   1.03   195.30  1176.82
zstd_08   1.03   195.98  1175.09
deflate_9 1.0530.15   483.55
deflate_8 1.0530.45   466.67
deflate_5 1.0531.25   480.92
deflate_4 1.0531.84   472.81
deflate_7 1.0531.84   484.18
deflate_6 1.0531.94   481.37
deflate_2 1.0533.07   484.09
deflate_3 1.0533.11   463.57
deflate_1 1.0533.19   469.71
zstd_22   1.06 8.89   647.75
zstd_21   1.0610.70   700.11
zstd_20   1.0610.80   723.42
zstd_19   1.0612.41   764.24
zbewalgo* 1.51   146.45   581.43
zbewalgo  1.51   146.45   592.86
zbewalgo'*1.5438.14   120.96
zbewalgo' 1.5438.14   125.81


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

Benjamin Warnke (5):
  add compression algorithm zBeWalgo
  crypto: add zBeWalgo to crypto-api
  crypto: add unsafe decompression to api
  crypto: configurable compression level
  crypto: add flag for unstable encoding

 crypto/842.c |   3 +-
 crypto/Kconfig   |  12 +
 crypto/Makefile  |   1 +
 crypto/api.c |  76 
 crypto/compress.c|  10 +
 crypto/crypto_null.c |   3 +-
 crypto/deflate.c |  19 +-
 crypto/lz4.c |  39 +-
 crypto/lz4hc.c   |  36 +-
 crypto/lzo.c |   3 +-
 crypto/testmgr.c |  39 +-
 crypto/testmgr.h | 134 +++
 crypto/zbewalgo.c| 191 ++
 drivers/block/zram/zcomp.c   |  13 +-
 drivers/block/zram/zcomp.h   |   3 +-
 drivers/block/zram/zram_drv.c|  56 ++-
 drivers/block/zram/zram_drv.h|   6 +-
 drivers/crypto/cavium/zip/zip_main.c |   6 +-
 drivers/crypto/nx/nx-842-powernv.c   |   3 +-
 drivers/crypto/nx/nx-842-pseries.c   |   3 +-
 fs/ubifs/compress.c  |   2 +-
 include/linux/crypto.h   |  31 +-
 include/linux/zbewalgo.h |  50 +++
 lib/Kconfig  |   3 +
 lib/Makefile |   1 +
 lib/zbewalgo/BWT.c   | 120 ++
 lib/zbewalgo/BWT.h   |  21 ++
 lib/zbewalgo/JBE.c   | 204 ++
 lib/zbewalgo/JBE.h   |  13 +
 lib/zbe

[PATCH v6 1/5] add compression algorithm zBeWalgo

2018-03-26 Thread Benjamin Warnke
zBeWalgo is a completely new algorithm - Currently it is not published
somewhere else right now, googleing it would not show up any results. The
following section describes how the algorithm works.

zBeWalgo itself is a container compression algorithm, which can execute
multiple different compression and transformation algorithms after each other.
The execution of different compression algorithms after each other will be
called 'combination' in this description and in the code. Additionally to be
able to execute combinations of algorithms, zBeWalgo can try different
combinations on the same input. This allows high compression ratios on
completely different datasets, which would otherwise require its own
algorithm each. Executing all known combinations on each input page would be
very slow. Therefore the data is compressed at first with that combination,
which was already successful on the last input page. If the compressed data
size of the current page is similar to that of the last page, the compressed
data is returned immediately without even trying the other combinations. Even
if there is no guarantee that consecutive calls to the algorithm belong to
each other, the speed improvement is obvious.

ZRAM uses zsmalloc for the management of the compressed pages. The largest
size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than
that threshold, ZRAM ignores the compression and writes the uncompressed page
instead. As a consequence it is useless to continue compression, if the
algorithm detects, that the data can not be compressed using the current
combination. The threshold for aborting compression can be changed via sysfs at
any time, even if the algorithm is currently in use. If a combination fails to
compress the data, zBeWalgo tries the next combination. If no combination is
able to reduce the data in size, zBeWalgo returns a negative value.

Each combination consists of up to 7 compression and transformation steps.
Combinations can be added and removed at any time via sysfs. Already compressed
Data can always be decompressed, even if the combination used to produce it
does not exist anymore. Technically the user could add up to 256 combinations
concurrently, but that would be very time consuming if the data can not be
compressed.

To be able to build combinations and call different algorithms, all those
algorithms are implementing the same interface. This enables the user to
specify additional combinations while ZRAM is running.

Within the combinations many different algorithms can be used. Some of those
algorithms are published. This patch adds the following algorithms to be used
within the combinations:
- bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and
'D. J. Wheeler' in 1994. This implementation uses counting sort for
sorting the data. Their original paper is online available at:
http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
- mtf: The Move-To-Front algorithm as described by 'M. Burrows' and
'D. J. Wheeler' in the same paper as bwt.
- jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012.
https://arxiv.org/pdf/1209.1045.pdf
- jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive
Bytes can increase the compression ratio, if for example the first
4 Bits of each Byte are zero. If jbe2 is called after mtf, this
happens ofthen.
- rle: Run Length Encoding
- huffman: Huffman encoding
- bewalgo: I invented this algorithm for my bachelors thesis
'Page-Based compression in the Linux Kernel'. This algorithm is
mainly inspired by lz4, focusing on increasing the speed even more,
with the help of page aligned read an write access. To achieve the
page alignment, the input and output data is accessed only in
blocks of 8 Bytes, therefore the encoding of the compressed data is
changed.

https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
algorithm. The input is read in blocks of 8 Bytes. These Blocks
are added to an avl-tree. The avl-tree is mapped directly to an
array. The encoding is a variation of Run Length Encoding using the
indices in the avl-tree as data. The reason for using the tree
with indices is, that the indices can be encoded in less then
8 Bytes each.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 include/linux/zbewalgo.h  |  50 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 120 
 lib/zbewalgo/BWT.h|  21 ++
 lib/zbewalgo/JBE.c| 204 +
 lib/zbewalgo/JBE.h|  13 +
 lib/zbewalgo/JBE2.c   | 221 ++
 lib/zbewalgo/JBE2.h   |  13 +
 lib/zbewalgo/MTF.c| 122 
 lib/zbewalgo/MTF.h|  13 +
 lib/zbewalgo/Makefile

[PATCH v6 5/5] crypto: add flag for unstable encoding

2018-03-26 Thread Benjamin Warnke
The data-format of zBeWalgo, and some other algorithms is unstable. To
identify such unstable algorithms this patch adds a new flag to the
crypto-api.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/zbewalgo.c  | 2 +-
 include/linux/crypto.h | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
index 9db0d43be..e57b5ced5 100644
--- a/crypto/zbewalgo.c
+++ b/crypto/zbewalgo.c
@@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct 
crypto_tfm *tfm,
 
 static struct crypto_alg crypto_alg_zbewalgo = {
.cra_name = "zbewalgo",
-   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING,
.cra_ctxsize = sizeof(struct zbewalgo_ctx),
.cra_module = THIS_MODULE,
.cra_init = zbewalgo_init,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 63420dac0..372893569 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,12 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY0x4000
 
+/*
+ * Set if the algorithm is new and it is likely that the encoding may
+ * change in near future
+ */
+#define CRYPTO_ALG_UNSTABLE_ENCODING   0x8000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
2.14.1



[RFC PATCH v5 2/5] crypto: add zBeWalgo to crypto-api

2018-03-23 Thread Benjamin Warnke
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by
zram.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/Kconfig|  12 
 crypto/Makefile   |   1 +
 crypto/testmgr.c  |  10 +++
 crypto/testmgr.h  | 134 ++
 crypto/zbewalgo.c | 164 ++
 drivers/block/zram/zcomp.c|   3 +
 drivers/block/zram/zram_drv.h |   4 +-
 7 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 crypto/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b75264b09..3ac0d4ca7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1668,6 +1668,18 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "zBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the zBeWalgo compression algorithm. This algorithm
+ accepts only input sizes of at most one page at once.
+ To achieve high compression ratios zbewalgo can call multiple
+ transformation and compression algorithms in a row to optimize
+ the compressed size.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d5e23a142..294075476 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.dec = __VECS(tf_xts_dec_tv_template)
}
}
+   }, {
+   .alg = "zbewalgo",
+   .test = alg_test_comp,
+   .fips_allowed = 1,
+   .suite = {
+   .comp = {
+   .comp = __VECS(zbewalgo_comp_tv_template),
+   .decomp = __VECS(zbewalgo_decomp_tv_template)
+   }
+   }
}, {
.alg = "zlib-deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6044f6906..996d8321e 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -35133,6 +35133,140 @@ static const struct hash_testvec 
bfin_crc_tv_template[] = {
 
 };
 
+static const struct comp_testvec zbewalgo_comp_tv_template[] = {
+   {
+   .inlen  = 512,
+   .outlen = 402,
+   .input  =
+   
"\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d"
+   
"\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d"
+   
"\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d"
+   
"\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d"
+   
"\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d"
+   
"\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d"
+   
"\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d"
+   
"\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d"
+   
"\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d"
+   
"\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d"
+   
"\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d"
+   
"\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d"
+   
"\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d"
+   
"\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d"
+   
"\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d"
+   
"\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d"
+   
"\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d"
+   
"\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d"
+   
"\x76\

[RFC PATCH v5 0/5] add compression algorithm zBeWalgo

2018-03-23 Thread Benjamin Warnke
 TCf01'
This dataset is an array of floating point values between -83.00402 and 
31.51576.
Detailed Information about this dataset is online available at
http://www.vets.ucar.edu/vg/isabeldata/readme.html

zBeWalgo is the only algorithm which can compress this dataset with a noticeable
compressionratio.

Algorithmratiowrite read
842   1.0060.09  1956.26
--hdd--   1.00   134.70   156.62
lz4hc_01  1.00   154.81  1839.37
lz4hc*_01 1.00   154.81  2105.53
lz4hc_10  1.00   157.33  2078.69
lz4hc*_10 1.00   157.33  2113.14
lz4hc_09  1.00   158.50  2018.51
lz4hc*_09 1.00   158.50  2093.65
lz4hc*_02 1.00   159.54  2104.91
lz4hc_02  1.00   159.54  2117.34
lz4hc_03  1.00   161.26  2070.76
lz4hc*_03 1.00   161.26  2107.27
lz4hc*_08 1.00   161.34  2100.74
lz4hc_08  1.00   161.34  2105.26
lz4hc*_04 1.00   161.95  2080.96
lz4hc_04  1.00   161.95  2104.00
lz4hc_05  1.00   162.17  2044.43
lz4hc*_05 1.00   162.17  2101.74
lz4hc*_06 1.00   163.61  2087.19
lz4hc_06  1.00   163.61  2104.61
lz4hc_07  1.00   164.51  2094.78
lz4hc*_07 1.00   164.51  2105.53
lz4_011.00  1134.89  2109.70
lz4*_01   1.00  1134.89  2118.71
lz4*_08   1.00  1141.96  2104.87
lz4_081.00  1141.96  2118.97
lz4_091.00  1145.55  2087.76
lz4*_09   1.00  1145.55  2118.85
lz4_021.00  1157.28  2094.33
lz4*_02   1.00  1157.28  2124.67
lz4*_03   1.00  1194.18  2106.36
lz4_031.00  1194.18  2119.89
lz4_041.00  1195.09  2117.03
lz4*_04   1.00  1195.09  2120.23
lz4*_05   1.00  1225.56  2109.04
lz4_051.00  1225.56  2120.52
lz4*_06   1.00  1261.67  2109.14
lz4_061.00  1261.67  2121.13
lz4*_07   1.00  1270.86  1844.63
lz4_071.00  1270.86  2041.08
lz4_101.00  1305.36  2109.22
lz4*_10   1.00  1305.36  2120.65
lzo   1.00  1338.61  2109.66
zstd_17   1.0313.93  1138.94
zstd_18   1.0314.01  1170.78
zstd_16   1.0327.12  1073.75
zstd_15   1.0343.52  1061.97
zstd_14   1.0349.60  1082.98
zstd_12   1.0355.03  1042.43
zstd_13   1.0355.14  1173.50
zstd_11   1.0355.24  1178.05
zstd_10   1.0370.01  1173.05
zstd_07   1.03   118.10  1041.92
zstd_06   1.03   123.00  1171.59
zstd_05   1.03   124.61  1165.74
zstd_01   1.03   166.80  1005.29
zstd_04   1.03   170.25  1127.75
zstd_03   1.03   171.40  1172.34
zstd_02   1.03   174.08  1017.34
zstd_09   1.03   195.30  1176.82
zstd_08   1.03   195.98  1175.09
deflate_9 1.0530.15   483.55
deflate_8 1.0530.45   466.67
deflate_5 1.0531.25   480.92
deflate_4 1.0531.84   472.81
deflate_7 1.0531.84   484.18
deflate_6 1.0531.94   481.37
deflate_2 1.0533.07   484.09
deflate_3 1.0533.11   463.57
deflate_1 1.0533.19   469.71
zstd_22   1.06 8.89   647.75
zstd_21   1.0610.70   700.11
zstd_20   1.0610.80   723.42
zstd_19   1.0612.41   764.24
zbewalgo* 1.51   146.45   581.43
zbewalgo  1.51   146.45   592.86
zbewalgo'*1.5438.14   120.96
zbewalgo' 1.5438.14   125.81


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

---

-- 
2.14.1



[RFC PATCH v5 4/5] crypto: configurable compression level

2018-03-23 Thread Benjamin Warnke
Most compression algorithms published by the crypto api are supporting
multiple different compression levels. The crypto api currently just
calls these algorithms with their default compression level.

This patch enables the caller to specify the compression level.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/api.c  | 76 +++
 crypto/deflate.c  | 16 +
 crypto/lz4.c  | 16 +
 crypto/lz4hc.c| 13 +---
 crypto/testmgr.c  |  2 +-
 drivers/block/zram/zcomp.c| 10 +++---
 drivers/block/zram/zcomp.h|  3 +-
 drivers/block/zram/zram_drv.c | 24 --
 drivers/block/zram/zram_drv.h |  1 +
 fs/ubifs/compress.c   |  2 +-
 include/linux/crypto.h|  9 +++--
 mm/zswap.c|  2 +-
 net/xfrm/xfrm_ipcomp.c|  3 +-
 13 files changed, 146 insertions(+), 31 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 70a894e52..dadd4dede 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg 
*alg, u32 type,
 }
 EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg,
+  u32 type, u32 mask, int level)
+{
+   struct crypto_tfm *tfm = NULL;
+   unsigned int tfm_size;
+   int err = -ENOMEM;
+
+   tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
+   tfm = kzalloc(tfm_size, GFP_KERNEL);
+   if (!tfm)
+   goto out_err;
+
+   tfm->__crt_alg = alg;
+   if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS)
+   tfm->crt_compress.cot_level = level;
+
+   err = crypto_init_ops(tfm, type, mask);
+   if (err)
+   goto out_free_tfm;
+
+   if (!tfm->exit && alg->cra_init) {
+   err = alg->cra_init(tfm);
+   if (err)
+   goto cra_init_failed;
+   }
+
+   goto out;
+
+cra_init_failed:
+   crypto_exit_ops(tfm);
+out_free_tfm:
+   if (err == -EAGAIN)
+   crypto_shoot_alg(alg);
+   kfree(tfm);
+out_err:
+   tfm = ERR_PTR(err);
+out:
+   return tfm;
+}
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress);
+
 /*
  * crypto_alloc_base - Locate algorithm and allocate transform
  * @alg_name: Name of algorithm
@@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, 
u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
+struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type,
+ u32 mask, int level)
+{
+   struct crypto_tfm *tfm;
+   int err;
+
+   for (;;) {
+   struct crypto_alg *alg;
+
+   alg = crypto_alg_mod_lookup(alg_name, type, mask);
+   if (IS_ERR(alg)) {
+   err = PTR_ERR(alg);
+   goto err;
+   }
+
+   tfm = __crypto_alloc_tfm_compress(alg, type, mask, level);
+   if (!IS_ERR(tfm))
+   return tfm;
+
+   crypto_mod_put(alg);
+   err = PTR_ERR(tfm);
+
+err:
+   if (err != -EAGAIN)
+   break;
+   if (fatal_signal_pending(current)) {
+   err = -EINTR;
+   break;
+   }
+   }
+
+   return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base_compress);
+
 void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
 {
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 4b681a37c..54a2ff21b 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -24,6 +24,7 @@
  * it is not needed for IPCOMP and keeps the code simpler.  It can be
  * implemented if someone wants it.
  */
+
 #include 
 #include 
 #include 
@@ -43,7 +44,7 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level)
 {
int ret = 0;
struct z_stream_s *stream = >comp_stream;
@@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
}
if (format)
-   ret = zlib_deflateInit(stream, 3);
+   ret = zlib_deflateInit(stream, level);
else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   ret = zlib_deflateInit2(stream, level, Z_DEFLATED,
-DEFLATE_DEF_WINBITS,
DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
@@ -109,11 +110,11 @@ static void deflate_decomp_exit(str

[RFC PATCH v5 3/5] crypto: add unsafe decompression to api

2018-03-23 Thread Benjamin Warnke
Up to Version 3 of this patch the decompressor of zbewalgo did not verify
that there is no overflow in the output buffer. Now zbewalgo includes a
safe decompressor which does check for buffer overflows and heap-error.
ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version,
which is a bit faster, but does no error checking. These unsafe decompressors
can be applied when the datasource and the whole datapath is trusted.

This patch publishes these existing functions in the crypto-api

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/842.c |  3 ++-
 crypto/compress.c| 10 ++
 crypto/crypto_null.c |  3 ++-
 crypto/deflate.c |  3 ++-
 crypto/lz4.c | 23 ++-
 crypto/lz4hc.c   | 23 ++-
 crypto/lzo.c |  3 ++-
 crypto/testmgr.c | 27 ++-
 crypto/zbewalgo.c| 29 -
 drivers/block/zram/zram_drv.c| 34 +-
 drivers/block/zram/zram_drv.h|  1 +
 drivers/crypto/cavium/zip/zip_main.c |  6 --
 drivers/crypto/nx/nx-842-powernv.c   |  3 ++-
 drivers/crypto/nx/nx-842-pseries.c   |  3 ++-
 include/linux/crypto.h   | 16 
 15 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index bc26dc942..7e74ea26b 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -112,7 +112,8 @@ static struct crypto_alg alg = {
.cra_exit   = crypto842_exit,
.cra_u  = { .compress = {
.coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
+   .coa_decompress = crypto842_decompress,
+   .coa_decompress_unsafe  = crypto842_decompress } }
 };
 
 static struct scomp_alg scomp = {
diff --git a/crypto/compress.c b/crypto/compress.c
index f2d522924..bec796249 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm,
   dlen);
 }
 
+static int crypto_decompress_unsafe(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+u8 *dst, unsigned int *dlen)
+{
+   return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src,
+ slen, dst,
+ dlen);
+}
+
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
struct compress_tfm *ops = >crt_compress;
 
ops->cot_compress = crypto_compress;
ops->cot_decompress = crypto_decompress;
+   ops->cot_decompress_unsafe = crypto_decompress_unsafe;
 
return 0;
 }
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 20ff2c746..6e15e8c0b 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { {
.cra_module =   THIS_MODULE,
.cra_u  =   { .compress = {
.coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
+   .coa_decompress =   null_compress,
+   .coa_decompress_unsafe  =   null_compress } }
 } };
 
 MODULE_ALIAS_CRYPTO("compress_null");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 94ec3b36a..4b681a37c 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -286,7 +286,8 @@ static struct crypto_alg alg = {
.cra_exit   = deflate_exit,
.cra_u  = { .compress = {
.coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
+   .coa_decompress = deflate_decompress,
+   .coa_decompress_unsafe  = deflate_decompress } }
 };
 
 static struct scomp_alg scomp[] = { {
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d3..60a1914b7 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned 
int slen,
return 0;
 }
 
+static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen,
+  void *ctx)
+{
+   int out_len = LZ4_decompress_fast(src, dst, *dlen);
+
+   if (out_len < 0)
+   return -EINVAL;
+
+   *dlen = out_len;
+   return 0;
+}
+
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -117,6 +130,13 @@ st

[RFC PATCH v5 1/5] add compression algorithm zBeWalgo

2018-03-23 Thread Benjamin Warnke
zBeWalgo is a completely new algorithm - Currently it is not published
somewhere else right now, googleing it would not show up any results. The
following section describes how the algorithm works.

zBeWalgo itself is a container compression algorithm, which can execute
multiple different compression and transformation algorithms after each other.
The execution of different compression algorithms after each other will be
called 'combination' in this description and in the code. Additionally to be
able to execute combinations of algorithms, zBeWalgo can try different
combinations on the same input. This allows high compression ratios on
completely different datasets, which would otherwise require its own
algorithm each. Executing all known combinations on each input page would be
very slow. Therefore the data is compressed at first with that combination,
which was already successful on the last input page. If the compressed data
size of the current page is similar to that of the last page, the compressed
data is returned immediately without even trying the other combinations. Even
if there is no guarantee that consecutive calls to the algorithm belong to
each other, the speed improvement is obvious.

ZRAM uses zsmalloc for the management of the compressed pages. The largest
size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than
that threshold, ZRAM ignores the compression and writes the uncompressed page
instead. As a consequence it is useless to continue compression, if the
algorithm detects, that the data can not be compressed using the current
combination. The threshold for aborting compression can be changed via sysfs at
any time, even if the algorithm is currently in use. If a combination fails to
compress the data, zBeWalgo tries the next combination. If no combination is
able to reduce the data in size, zBeWalgo returns a negative value.

Each combination consists of up to 7 compression and transformation steps.
Combinations can be added and removed at any time via sysfs. Already compressed
Data can always be decompressed, even if the combination used to produce it
does not exist anymore. Technically the user could add up to 256 combinations
concurrently, but that would be very time consuming if the data can not be
compressed.

To be able to build combinations and call different algorithms, all those
algorithms are implementing the same interface. This enables the user to
specify additional combinations while ZRAM is running.

Within the combinations many different algorithms can be used. Some of those
algorithms are published. This patch adds the following algorithms to be used
within the combinations:
- bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and
'D. J. Wheeler' in 1994. This implementation uses counting sort for
sorting the data. Their original paper is online available at:
http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
- mtf: The Move-To-Front algorithm as described by 'M. Burrows' and
'D. J. Wheeler' in the same paper as bwt.
- jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012.
https://arxiv.org/pdf/1209.1045.pdf
- jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive
Bytes can increase the compression ratio, if for example the first
4 Bits of each Byte are zero. If jbe2 is called after mtf, this
happens ofthen.
- rle: Run Length Encoding
- huffman: Huffman encoding
- bewalgo: I invented this algorithm for my bachelors thesis
'Page-Based compression in the Linux Kernel'. This algorithm is
mainly inspired by lz4, focusing on increasing the speed even more,
with the help of page aligned read an write access. To achieve the
page alignment, the input and output data is accessed only in
blocks of 8 Bytes, therefore the encoding of the compressed data is
changed.

https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
algorithm. The input is read in blocks of 8 Bytes. These Blocks
are added to an avl-tree. The avl-tree is mapped directly to an
array. The encoding is a variation of Run Length Encoding using the
indices in the avl-tree as data. The reason for using the tree
with indices is, that the indices can be encoded in less then
8 Bytes each.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 include/linux/zbewalgo.h  |  50 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 118 
 lib/zbewalgo/BWT.h|  21 ++
 lib/zbewalgo/JBE.c| 204 +
 lib/zbewalgo/JBE.h|  13 +
 lib/zbewalgo/JBE2.c   | 221 ++
 lib/zbewalgo/JBE2.h   |  13 +
 lib/zbewalgo/MTF.c| 122 
 lib/zbewalgo/MTF.h|  13 +
 lib/zbewalgo/Makefile

[RFC PATCH v5 5/5] crypto: add flag for unstable encoding

2018-03-23 Thread Benjamin Warnke
The data-format of zBeWalgo, and some other algorithms is unstable. To
identify such unstable algorithms this patch adds a new flag to the
crypto-api.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/zbewalgo.c  | 2 +-
 include/linux/crypto.h | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
index 9db0d43be..e57b5ced5 100644
--- a/crypto/zbewalgo.c
+++ b/crypto/zbewalgo.c
@@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct 
crypto_tfm *tfm,
 
 static struct crypto_alg crypto_alg_zbewalgo = {
.cra_name = "zbewalgo",
-   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING,
.cra_ctxsize = sizeof(struct zbewalgo_ctx),
.cra_module = THIS_MODULE,
.cra_init = zbewalgo_init,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 63420dac0..372893569 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,12 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY0x4000
 
+/*
+ * Set if the algorithm is new and it is likely that the encoding may
+ * change in near future
+ */
+#define CRYPTO_ALG_UNSTABLE_ENCODING   0x8000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
2.14.1



Re: [PATCH 1/5 v4] add compression algorithm zBeWalgo

2018-03-22 Thread Benjamin Warnke
Hi Philippe,

> Actually to be consistent if you want to use GPL-2-0 (and not "or
> later") you should use:
> 
> 1. at the top, for a c. file:
> // SPDX-License-Identifier: GPL-2.0
> 
> or for a .h file:
> /* SPDX-License-Identifier: GPL-2.0 */
> 
> The doc explains it all. Including the comment style (a topic that has
> been discussed also on list quite bit: Linus had the final word there)
> 
> 2. and in your MODULE_LICENSE macro:
> 
> MODULE_LICENSE("GPL v2");
> 
>  because a MODULE_LICENSE("GPL"); would mean GPL-2.0+ (e.g. or any
> later version) and this would not match your top level license tag.

Thanks, I have done this correctly in my files, but accidentally written it 
wrong in my mail.

Cordinally
Benjamin Warnke


Re: [PATCH 1/5 v4] add compression algorithm zBeWalgo

2018-03-21 Thread Benjamin Warnke
Hi Philippe,


> Am 20.03.2018 um 17:30 schrieb Philippe Ombredanne <pombreda...@nexb.com>:
> 
> Hi Benjamin,
> 
> On Tue, Mar 20, 2018 at 7:04 AM, Benjamin Warnke
> <4bwar...@informatik.uni-hamburg.de> wrote:
>> zBeWalgo is a completely new algorithm - Currently it is not published
>> somewhere else right now, googleing it would not show up any results. The
>> following section describes how the algorithm works.
> 
> 
> 
>> diff --git a/lib/zbewalgo/zbewalgo.c b/lib/zbewalgo/zbewalgo.c
>> new file mode 100644
>> index 0..ef922bc27
>> --- /dev/null
>> +++ b/lib/zbewalgo/zbewalgo.c
>> @@ -0,0 +1,723 @@
>> +/*
>> + * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published 
>> by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but 
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along 
>> with
>> + * this program.
>> + *
> 
> Would you mind using SPDX ids [1] instead of this fine boilerplate
> here and throughout your patches?

Ok, I will use 

/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
...

at the top of my files instead of that boilerplate text. And

MODULE_LICENSE("GPL");

at the bottom of the module-files.


> 
> 
>> +MODULE_LICENSE("GPL");
>> +MODULE_DESCRIPTION("zBeWalgo Compression Algorithm");
> 
> Here your MODULE_LICENSE does not match your top level license. See
> module.h [2] for a description of values: GPL would mean "GNU Public
> License v2 or later" whereas your top level license (best expressed
> with SPDX) would mean GPL-2.0 and no other version. To avoid
> confusion, you would need to state the same thing in the
> MODULE_LICENSE and your SPDX tags.

I used the file "crypto/lz4.c" - since it is a compression algorithm too - as 
an example of how to format the licensing text.
Unfortunately there is the same 'error'.
I fixed this error in all of my files in all patches.

Cordially
Benjamin Warnke


[PATCH 4/5 v4] crypto: configurable compression level

2018-03-20 Thread Benjamin Warnke
Most compression algorithms published by the crypto api are supporting
multiple different compression levels. The crypto api currently just
calls these algorithms with their default compression level.

This patch enables the caller to specify the compression level.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/api.c  | 76 +++
 crypto/deflate.c  | 16 +
 crypto/lz4.c  | 16 +
 crypto/lz4hc.c| 13 +---
 crypto/testmgr.c  |  2 +-
 drivers/block/zram/zcomp.c| 10 +++---
 drivers/block/zram/zcomp.h|  3 +-
 drivers/block/zram/zram_drv.c | 24 --
 drivers/block/zram/zram_drv.h |  1 +
 fs/ubifs/compress.c   |  2 +-
 include/linux/crypto.h|  9 +++--
 mm/zswap.c|  2 +-
 net/xfrm/xfrm_ipcomp.c|  3 +-
 13 files changed, 146 insertions(+), 31 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 70a894e52..dadd4dede 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg 
*alg, u32 type,
 }
 EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg,
+  u32 type, u32 mask, int level)
+{
+   struct crypto_tfm *tfm = NULL;
+   unsigned int tfm_size;
+   int err = -ENOMEM;
+
+   tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
+   tfm = kzalloc(tfm_size, GFP_KERNEL);
+   if (!tfm)
+   goto out_err;
+
+   tfm->__crt_alg = alg;
+   if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS)
+   tfm->crt_compress.cot_level = level;
+
+   err = crypto_init_ops(tfm, type, mask);
+   if (err)
+   goto out_free_tfm;
+
+   if (!tfm->exit && alg->cra_init) {
+   err = alg->cra_init(tfm);
+   if (err)
+   goto cra_init_failed;
+   }
+
+   goto out;
+
+cra_init_failed:
+   crypto_exit_ops(tfm);
+out_free_tfm:
+   if (err == -EAGAIN)
+   crypto_shoot_alg(alg);
+   kfree(tfm);
+out_err:
+   tfm = ERR_PTR(err);
+out:
+   return tfm;
+}
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress);
+
 /*
  * crypto_alloc_base - Locate algorithm and allocate transform
  * @alg_name: Name of algorithm
@@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, 
u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
+struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type,
+ u32 mask, int level)
+{
+   struct crypto_tfm *tfm;
+   int err;
+
+   for (;;) {
+   struct crypto_alg *alg;
+
+   alg = crypto_alg_mod_lookup(alg_name, type, mask);
+   if (IS_ERR(alg)) {
+   err = PTR_ERR(alg);
+   goto err;
+   }
+
+   tfm = __crypto_alloc_tfm_compress(alg, type, mask, level);
+   if (!IS_ERR(tfm))
+   return tfm;
+
+   crypto_mod_put(alg);
+   err = PTR_ERR(tfm);
+
+err:
+   if (err != -EAGAIN)
+   break;
+   if (fatal_signal_pending(current)) {
+   err = -EINTR;
+   break;
+   }
+   }
+
+   return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base_compress);
+
 void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
 {
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 4b681a37c..54a2ff21b 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -24,6 +24,7 @@
  * it is not needed for IPCOMP and keeps the code simpler.  It can be
  * implemented if someone wants it.
  */
+
 #include 
 #include 
 #include 
@@ -43,7 +44,7 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level)
 {
int ret = 0;
struct z_stream_s *stream = >comp_stream;
@@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
}
if (format)
-   ret = zlib_deflateInit(stream, 3);
+   ret = zlib_deflateInit(stream, level);
else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   ret = zlib_deflateInit2(stream, level, Z_DEFLATED,
-DEFLATE_DEF_WINBITS,
DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
@@ -109,11 +110,11 @@ static void deflate_decomp_exit(str

[PATCH 5/5 v4] crypto: add flag for unstable encoding

2018-03-20 Thread Benjamin Warnke
The data-format of zBeWalgo, and some other algorithms is unstable. To
identify such unstable algorithms this patch adds a new flag to the
crypto-api.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/zbewalgo.c  | 2 +-
 include/linux/crypto.h | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
index c8481b872..9f2b07ac2 100644
--- a/crypto/zbewalgo.c
+++ b/crypto/zbewalgo.c
@@ -148,7 +148,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct 
crypto_tfm *tfm,
 
 static struct crypto_alg crypto_alg_zbewalgo = {
.cra_name = "zbewalgo",
-   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING,
.cra_ctxsize = sizeof(struct zbewalgo_ctx),
.cra_module = THIS_MODULE,
.cra_init = zbewalgo_init,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 63420dac0..372893569 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,12 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY0x4000
 
+/*
+ * Set if the algorithm is new and it is likely that the encoding may
+ * change in near future
+ */
+#define CRYPTO_ALG_UNSTABLE_ENCODING   0x8000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
2.14.1



[PATCH 3/5 v4] crypto: add unsafe decompression to api

2018-03-20 Thread Benjamin Warnke
Up to Version 3 of this patch the decompressor of zbewalgo did not verify
that there is no overflow in the output buffer. Now zbewalgo includes a
safe decompressor which does check for buffer overflows and heap-error.
ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version,
which is a bit faster, but does no error checking. These unsafe decompressors
can be applied when the datasource and the whole datapath is trusted.

This patch publishes these existing functions in the crypto-api

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/842.c |  3 ++-
 crypto/compress.c| 10 ++
 crypto/crypto_null.c |  3 ++-
 crypto/deflate.c |  3 ++-
 crypto/lz4.c | 23 ++-
 crypto/lz4hc.c   | 23 ++-
 crypto/lzo.c |  3 ++-
 crypto/testmgr.c | 27 ++-
 crypto/zbewalgo.c| 29 -
 drivers/block/zram/zram_drv.c| 34 +-
 drivers/block/zram/zram_drv.h|  1 +
 drivers/crypto/cavium/zip/zip_main.c |  6 --
 drivers/crypto/nx/nx-842-powernv.c   |  3 ++-
 drivers/crypto/nx/nx-842-pseries.c   |  3 ++-
 include/linux/crypto.h   | 16 
 15 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index bc26dc942..7e74ea26b 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -112,7 +112,8 @@ static struct crypto_alg alg = {
.cra_exit   = crypto842_exit,
.cra_u  = { .compress = {
.coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
+   .coa_decompress = crypto842_decompress,
+   .coa_decompress_unsafe  = crypto842_decompress } }
 };
 
 static struct scomp_alg scomp = {
diff --git a/crypto/compress.c b/crypto/compress.c
index f2d522924..bec796249 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm,
   dlen);
 }
 
+static int crypto_decompress_unsafe(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+u8 *dst, unsigned int *dlen)
+{
+   return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src,
+ slen, dst,
+ dlen);
+}
+
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
struct compress_tfm *ops = >crt_compress;
 
ops->cot_compress = crypto_compress;
ops->cot_decompress = crypto_decompress;
+   ops->cot_decompress_unsafe = crypto_decompress_unsafe;
 
return 0;
 }
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 20ff2c746..6e15e8c0b 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { {
.cra_module =   THIS_MODULE,
.cra_u  =   { .compress = {
.coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
+   .coa_decompress =   null_compress,
+   .coa_decompress_unsafe  =   null_compress } }
 } };
 
 MODULE_ALIAS_CRYPTO("compress_null");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 94ec3b36a..4b681a37c 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -286,7 +286,8 @@ static struct crypto_alg alg = {
.cra_exit   = deflate_exit,
.cra_u  = { .compress = {
.coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
+   .coa_decompress = deflate_decompress,
+   .coa_decompress_unsafe  = deflate_decompress } }
 };
 
 static struct scomp_alg scomp[] = { {
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d3..60a1914b7 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned 
int slen,
return 0;
 }
 
+static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen,
+  void *ctx)
+{
+   int out_len = LZ4_decompress_fast(src, dst, *dlen);
+
+   if (out_len < 0)
+   return -EINVAL;
+
+   *dlen = out_len;
+   return 0;
+}
+
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -117,6 +130,13 @@ st

[PATCH 0/5 v4] add compression algorithm zBeWalgo

2018-03-20 Thread Benjamin Warnke
edu/vg/isabeldata/readme.html

zBeWalgo is the only algorithm which can compress this dataset with a noticeable
compressionratio.

Algorithmratiowrite read
842   1.0060.09  1956.26
--hdd--   1.00   134.70   156.62
lz4hc_01  1.00   154.81  1839.37
lz4hc*_01 1.00   154.81  2105.53
lz4hc_10  1.00   157.33  2078.69
lz4hc*_10 1.00   157.33  2113.14
lz4hc_09  1.00   158.50  2018.51
lz4hc*_09 1.00   158.50  2093.65
lz4hc*_02 1.00   159.54  2104.91
lz4hc_02  1.00   159.54  2117.34
lz4hc_03  1.00   161.26  2070.76
lz4hc*_03 1.00   161.26  2107.27
lz4hc*_08 1.00   161.34  2100.74
lz4hc_08  1.00   161.34  2105.26
lz4hc*_04 1.00   161.95  2080.96
lz4hc_04  1.00   161.95  2104.00
lz4hc_05  1.00   162.17  2044.43
lz4hc*_05 1.00   162.17  2101.74
lz4hc*_06 1.00   163.61  2087.19
lz4hc_06  1.00   163.61  2104.61
lz4hc_07  1.00   164.51  2094.78
lz4hc*_07 1.00   164.51  2105.53
lz4_011.00  1134.89  2109.70
lz4*_01   1.00  1134.89  2118.71
lz4*_08   1.00  1141.96  2104.87
lz4_081.00  1141.96  2118.97
lz4_091.00  1145.55  2087.76
lz4*_09   1.00  1145.55  2118.85
lz4_021.00  1157.28  2094.33
lz4*_02   1.00  1157.28  2124.67
lz4*_03   1.00  1194.18  2106.36
lz4_031.00  1194.18  2119.89
lz4_041.00  1195.09  2117.03
lz4*_04   1.00  1195.09  2120.23
lz4*_05   1.00  1225.56  2109.04
lz4_051.00  1225.56  2120.52
lz4*_06   1.00  1261.67  2109.14
lz4_061.00  1261.67  2121.13
lz4*_07   1.00  1270.86  1844.63
lz4_071.00  1270.86  2041.08
lz4_101.00  1305.36  2109.22
lz4*_10   1.00  1305.36  2120.65
lzo   1.00  1338.61  2109.66
zstd_17   1.0313.93  1138.94
zstd_18   1.0314.01  1170.78
zstd_16   1.0327.12  1073.75
zstd_15   1.0343.52  1061.97
zstd_14   1.0349.60  1082.98
zstd_12   1.0355.03  1042.43
zstd_13   1.0355.14  1173.50
zstd_11   1.0355.24  1178.05
zstd_10   1.0370.01  1173.05
zstd_07   1.03   118.10  1041.92
zstd_06   1.03   123.00  1171.59
zstd_05   1.03   124.61  1165.74
zstd_01   1.03   166.80  1005.29
zstd_04   1.03   170.25  1127.75
zstd_03   1.03   171.40  1172.34
zstd_02   1.03   174.08  1017.34
zstd_09   1.03   195.30  1176.82
zstd_08   1.03   195.98  1175.09
deflate_9 1.0530.15   483.55
deflate_8 1.0530.45   466.67
deflate_5 1.0531.25   480.92
deflate_4 1.0531.84   472.81
deflate_7 1.0531.84   484.18
deflate_6 1.0531.94   481.37
deflate_2 1.0533.07   484.09
deflate_3 1.0533.11   463.57
deflate_1 1.0533.19   469.71
zstd_22   1.06 8.89   647.75
zstd_21   1.0610.70   700.11
zstd_20   1.0610.80   723.42
zstd_19   1.0612.41   764.24
zbewalgo* 1.51   146.45   581.43
zbewalgo  1.51   146.45   592.86
zbewalgo'*1.5438.14   120.96
zbewalgo' 1.5438.14   125.81


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

---

-- 
2.14.1



[PATCH 2/5 v4] crypto: add zBeWalgo to crypto-api

2018-03-20 Thread Benjamin Warnke
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by
zram.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/Kconfig|  12 +++
 crypto/Makefile   |   1 +
 crypto/testmgr.c  |  10 +++
 crypto/testmgr.h  | 134 
 crypto/zbewalgo.c | 177 ++
 drivers/block/zram/zcomp.c|   3 +
 drivers/block/zram/zram_drv.h |   4 +-
 7 files changed, 340 insertions(+), 1 deletion(-)
 create mode 100644 crypto/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b75264b09..3ac0d4ca7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1668,6 +1668,18 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "zBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the zBeWalgo compression algorithm. This algorithm
+ accepts only input sizes of at most one page at once.
+ To achieve high compression ratios zbewalgo can call multiple
+ transformation and compression algorithms in a row to optimize
+ the compressed size.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d5e23a142..294075476 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.dec = __VECS(tf_xts_dec_tv_template)
}
}
+   }, {
+   .alg = "zbewalgo",
+   .test = alg_test_comp,
+   .fips_allowed = 1,
+   .suite = {
+   .comp = {
+   .comp = __VECS(zbewalgo_comp_tv_template),
+   .decomp = __VECS(zbewalgo_decomp_tv_template)
+   }
+   }
}, {
.alg = "zlib-deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6044f6906..996d8321e 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -35133,6 +35133,140 @@ static const struct hash_testvec 
bfin_crc_tv_template[] = {
 
 };
 
+static const struct comp_testvec zbewalgo_comp_tv_template[] = {
+   {
+   .inlen  = 512,
+   .outlen = 402,
+   .input  =
+   
"\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d"
+   
"\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d"
+   
"\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d"
+   
"\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d"
+   
"\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d"
+   
"\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d"
+   
"\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d"
+   
"\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d"
+   
"\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d"
+   
"\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d"
+   
"\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d"
+   
"\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d"
+   
"\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d"
+   
"\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d"
+   
"\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d"
+   
"\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d"
+   
"\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d"
+   
"\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d"
+   
"\x76\x7f\

[PATCH 1/5 v4] add compression algorithm zBeWalgo

2018-03-20 Thread Benjamin Warnke
zBeWalgo is a completely new algorithm - Currently it is not published
somewhere else right now, googleing it would not show up any results. The
following section describes how the algorithm works.

zBeWalgo itself is a container compression algorithm, which can execute
multiple different compression and transformation algorithms after each other.
The execution of different compression algorithms after each other will be
called 'combination' in this description and in the code. Additionally to be
able to execute combinations of algorithms, zBeWalgo can try different
combinations on the same input. This allows high compression ratios on
completely different datasets, which would otherwise require its own
algorithm each. Executing all known combinations on each input page would be
very slow. Therefore the data is compressed at first with that combination,
which was already successful on the last input page. If the compressed data
size of the current page is similar to that of the last page, the compressed
data is returned immediately without even trying the other combinations. Even
if there is no guarantee that consecutive calls to the algorithm belong to
each other, the speed improvement is obvious.

ZRAM uses zsmalloc for the management of the compressed pages. The largest
size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than
that threshold, ZRAM ignores the compression and writes the uncompressed page
instead. As a consequence it is useless to continue compression, if the
algorithm detects, that the data can not be compressed using the current
combination. The threshold for aborting compression can be changed via sysfs at
any time, even if the algorithm is currently in use. If a combination fails to
compress the data, zBeWalgo tries the next combination. If no combination is
able to reduce the data in size, zBeWalgo returns a negative value.

Each combination consists of up to 7 compression and transformation steps.
Combinations can be added and removed at any time via sysfs. Already compressed
Data can always be decompressed, even if the combination used to produce it
does not exist anymore. Technically the user could add up to 256 combinations
concurrently, but that would be very time consuming if the data can not be
compressed.

To be able to build combinations and call different algorithms, all those
algorithms are implementing the same interface. This enables the user to
specify additional combinations while ZRAM is running.

Within the combinations many different algorithms can be used. Some of those
algorithms are published. This patch adds the following algorithms to be used
within the combinations:
- bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and
'D. J. Wheeler' in 1994. This implementation uses counting sort for
sorting the data. Their original paper is online available at:
http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
- mtf: The Move-To-Front algorithm as described by 'M. Burrows' and
'D. J. Wheeler' in the same paper as bwt.
- jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012.
https://arxiv.org/pdf/1209.1045.pdf
- jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive
Bytes can increase the compression ratio, if for example the first
4 Bits of each Byte are zero. If jbe2 is called after mtf, this
happens ofthen.
- rle: Run Length Encoding
- huffman: Huffman encoding
- bewalgo: I invented this algorithm for my bachelors thesis
'Page-Based compression in the Linux Kernel'. This algorithm is
mainly inspired by lz4, focusing on increasing the speed even more,
with the help of page aligned read an write access. To achieve the
page alignment, the input and output data is accessed only in
blocks of 8 Bytes, therefore the encoding of the compressed data is
changed.

https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
algorithm. The input is read in blocks of 8 Bytes. These Blocks
are added to an avl-tree. The avl-tree is mapped directly to an
array. The encoding is a variation of Run Length Encoding using the
indices in the avl-tree as data. The reason for using the tree
with indices is, that the indices can be encoded in less then
8 Bytes each.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 include/linux/zbewalgo.h  |  62 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 129 +
 lib/zbewalgo/BWT.h|  33 +++
 lib/zbewalgo/JBE.c| 215 ++
 lib/zbewalgo/JBE.h|  25 ++
 lib/zbewalgo/JBE2.c   | 232 +++
 lib/zbewalgo/JBE2.h   |  25 ++
 lib/zbewalgo/MTF.c| 133 +
 lib/zbewalgo/MTF.h|  25 ++
 lib/zbewalgo/Ma

Re: [RESEND PATCH v3] crypto: add zBeWalgo compression for zram

2018-03-07 Thread Benjamin Warnke
Hi Eric,


On 06.03.2018 at 23:13, Eric Biggers wrote:
> 
> Hi Benjamin,
> 
> On Tue, Mar 06, 2018 at 09:23:08PM +0100, Benjamin Warnke wrote:
>> Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM
>> compresses each page individually. As a result the compression algorithm is
>> forced to use a very small sliding window. None of the available compression
>> algorithms is designed to achieve high compression ratios with small inputs.
>> 
>> This patch adds a new compression algorithm 'zBeWalgo' to the crypto api. 
>> This
>> algorithm focusses on increasing the capacity of the compressed block-device
>> created by ZRAM. The choice of compression algorithms is always a tradeoff
>> between speed and compression ratio.
>> 
> [...]
>> 
>> Zstd is not in the list of compared algorithms, because it is not available
>> for Zram in the currently available kernel trees.
>> 
> 
> This still isn't a valid excuse for not comparing it to Zstandard.  Zstandard 
> is
> already in the kernel in lib/.  It would only need glue code to wire it up as 
> an
> scomp_alg to the crypto API.  

I'll add benchmarks with Zstandard.

> In contrast you're proposing an entirely new
> algorithm.  Ultimately, by proposing a new algorithm you're on the hook to
> demonstrate that existing algorithms aren't good enough.  Note that many of 
> the
> existing algorithms including Zstandard also have multiple compression levels
> available to choose from.

The crypto api exposes each algorithm only once with its default compression 
level.
Currently there is no switch/option/mechanism/code or something to switch 
thoose levels.
Of course I could benchmark the algorithms with multiple compression levels.
How many / Which compression levels would you like to see to be compared with 
each other?

> It's also rare to add a compression or encryption algorithm to the Linux 
> kernel
> that isn't already used somewhere else.

The kernel is the only place where so many small pieces of data need to be 
compressed.
In user space nobody cares that the data is splitted into pages, and therefore 
compression usually applied to large datasets.

>  Have you at least written a formal
> specification of the 'zBeWalgo' data format?

Not yet.

>  Zstandard, for example, had a
> well-tested and optimized userspace library and a specification of the data
> format before it was added to the kernel.  And even before that it took a 
> couple
> years of development to stabilize the Zstandard data format.
> 
> Now, it's true that you don't strictly need a stable data format if the
> algorithm will only ever be used for RAM compression.  But, if you want to 
> take
> that shortcut, then you're on the hook to justify it, and perhaps to enlighten
> the crypto API about algorithms that don't have a stable data format (e.g. 
> using
> a new CRYPTO_ALG_* flag) so that users have to more explicitly opt-in to using
> them.

I could add CRYPTO_ALG_TYPE_COMPRESS_UNSTABLE as an alias for 
CRYPTO_ALG_TYPE_COMPRESS

> You cannot just ignore fuzz-safety in the decompressor either. The existing
> decompressors exposed through the crypto API are fuzz-safe, so it's not valid 
> to
> compare an unsafe decompressor to them.  If you really do want to add an 
> unsafe
> decompressor then you'd likely need to look into adding crypto API support for
> users to request unsafe decompression -- which could also be used to expose 
> the
> unsafe version of the LZ4 decompressor (LZ4_decompress_fast() instead of
> LZ4_decompress_safe()).  Or if you do make the decompressor safe, then of 
> course
> you'd actually need to fuzz it; I suggest porting the code to userspace and
> running american fuzzy lop (afl-fuzz) on it: http://lcamtuf.coredump.cx/afl/

The current version of the decompressor is fuzz-safe. I tested it with lots of 
data and made sure, that each possible branch in the code is executed multiple 
times in different combinations.

> Separately, given that this is a brand new algorithm and it seems to have
> various corner cases, it would be a good idea to demonstrate a higher 
> guarantee
> of the correctness of the compression-decompression round trip.  afl-fuzz can 
> be
> good for that too: you could port the code to userspace and write a simple
> program which compresses and decompresses a file and aborts if it was 
> corrupted.
> Then by passing the program to afl-fuzz it will eventually cover most of the
> compressor and decompressor.  I've done that when testing compression code in
> the past.

I will port my code to user space and use afl-fuzz to test it.

Benjamin



Re: [RESEND PATCH v3] crypto: add zBeWalgo compression for zram

2018-03-07 Thread Benjamin Warnke
Hello,

On(07/03/2018 03:12),Sergey Senozhatsky wrote:
> 
> Hello,
> 
> On (03/06/18 20:59), Benjamin Warnke wrote:
>>   Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM
>>   compresses each page individually. As a result the compression algorithm
>>   is
>>   forced to use a very small sliding window. None of the available
>>   compression
>>   algorithms is designed to achieve high compression ratios with small
>>   inputs.
> 
> I think you first need to merge zBeWalgo (looks like a long way to go)
> And then add ZRAM support, as a separate patch.

I'll split my patch into 2 parts

1st: add zBeWalgo compression algorithm
2nd: enable zBeWalgo to be used by ZRAM

> 
>>   - 'ecoham' (100 MiB) This dataset is one of the input files for the
>>   scientific
>>   application ECOHAM which runs an ocean simulation. This dataset contains a
>>   lot of zeros. Where the data is not zero there are arrays of floating
>>   point
>>   values, adjacent float values are likely to be similar to each other,
>>   allowing for high compression ratios.
>> 
>>   algorithm | ratio   | compression| decompression
>>   zbewalgo  |   12.94 |  294.10 MBit/s | 1242.59 MBit/s
>>   deflate   |   12.54 |   75.51 MBit/s |  736.39 MBit/s
>>   842   |   12.26 |  182.59 MBit/s |  683.61 MBit/s
>>   lz4hc |   12.00 |   51.23 MBit/s | 1524.73 MBit/s
>>   lz4   |   10.68 | 1334.37 MBit/s | 1603.54 MBit/s
>>   lzo   |9.79 | 1333.76 MBit/s | 1534.63 MBit/s
>> 
>>   - 'source-code' (800 MiB) This dataset is a tarball of the source-code
>>   from a
>>   linux-kernel.
>> 
>>   algorithm | ratio   | compression| decompression
>>   deflate   |3.27 |   42.48 MBit/s |  250.36 MBit/s
>>   lz4hc |2.40 |  104.14 MBit/s | 1150.53 MBit/s
>>   lzo   |2.27 |  444.77 MBit/s |  886.97 MBit/s
>>   lz4   |2.18 |  453.08 MBit/s | 1101.45 MBit/s
>>   842   |1.65 |   64.10 MBit/s |  158.40 MBit/s
>>   zbewalgo  |1.19 |   52.89 MBit/s |  197.58 MBit/s
>> 
>>   - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the
>>   running hpcg-benchmark. At the time of the snapshot, that application
>>   performed a sparse matrix - vector multiplication.
>> 
>>   algorithm | ratio   | compression| decompression
>>   zbewalgo  |   16.16 |  179.97 MBit/s |  468.36 MBit/s
>>   deflate   |9.52 |   65.11 MBit/s |  632.69 MBit/s
>>   lz4hc |4.96 |  193.33 MBit/s | 1607.12 MBit/s
>>   842   |4.20 |  150.99 MBit/s |  316.22 MBit/s
>>   lzo   |4.14 |  922.74 MBit/s |  865.32 MBit/s
>>   lz4   |3.79 |  908.39 MBit/s | 1375.33 MBit/s
>> 
>>   - 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar,
>>   but
>>   not equal. This array is produced by a partial differential equation
>>   solver
>>   using a Jakobi-implementation.
>> 
>>   algorithm | ratio   | compression| decompression
>>   zbewalgo  |1.30 |  203.30 MBit/s |  530.87 MBit/s
>>   deflate   |1.02 |   37.06 MBit/s | 1131.88 MBit/s
>>   lzo   |1.00 | 1741.46 MBit/s | 2012.78 MBit/s
>>   lz4   |1.00 | 1458.08 MBit/s | 2013.88 MBit/s
>>   lz4hc |1.00 |  173.19 MBit/s | 2012.37 MBit/s
>>   842   |1.00 |   64.10 MBit/s | 2013.64 MBit/s
> 
> Hm, mixed feelings.

as Eric Biggers suggested, I'll add Zstandard to the set of algorithms which 
compared. What else should I add to the benchmarks?

Benjamin



[RESEND PATCH v3] crypto: add zBeWalgo compression for zram

2018-03-06 Thread Benjamin Warnke
 invented this algorithm for my bachelors thesis
 'Page-Based compression in the Linux Kernel'. This algorithm is
 mainly inspired by lz4, focusing on increasing the speed even more,
 with the help of page aligned read an write access. To achieve the
 page alignment, the input and output data is accessed only in
 blocks of 8 Bytes, therefore the encoding of the compressed data is
 changed.
 
https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
 algorithm. The input is read in blocks of 8 Bytes. These Blocks
 are added to an avl-tree. The avl-tree is mapped directly to an
 array. The encoding is a variation of Run Length Encoding using the
 indices in the avl-tree as data. The reason for using the tree
 with indices is, that the indices can be encoded in less then
 8 Bytes each.


Benchmarks:


To obtain reproducable benchmarks, the datasets were first loaded into a
userspace-program. Than the data is written directly to a clean
zram-partition without any filesystem. Between writing and reading 'sync'
and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are
wall clock times, and the benchmarks are using only one cpu-core at a time.
The new algorithm is compared to all available compression algorithms from
the crypto-api.

Zstd is not in the list of compared algorithms, because it is not available
for Zram in the currently available kernel trees.

- '/dev/zero' (8 GiB) This dataset is used to measure the speed limitations
for ZRAM. ZRAM filters zero-data internally and does not even call the
specified compression algorithm.

algorithm | compression| decompression
--zram--  | 2724.08 MBit/s | 2828.87 MBit/s


- 'ecoham' (100 MiB) This dataset is one of the input files for the scientific
application ECOHAM which runs an ocean simulation. This dataset contains a
lot of zeros. Where the data is not zero there are arrays of floating point
values, adjacent float values are likely to be similar to each other,
allowing for high compression ratios.

algorithm | ratio   | compression| decompression
zbewalgo  |   12.94 |  294.10 MBit/s | 1242.59 MBit/s
deflate   |   12.54 |   75.51 MBit/s |  736.39 MBit/s
842   |   12.26 |  182.59 MBit/s |  683.61 MBit/s
lz4hc |   12.00 |   51.23 MBit/s | 1524.73 MBit/s
lz4   |   10.68 | 1334.37 MBit/s | 1603.54 MBit/s
lzo   |9.79 | 1333.76 MBit/s | 1534.63 MBit/s


- 'source-code' (800 MiB) This dataset is a tarball of the source-code from a
linux-kernel.

algorithm | ratio   | compression| decompression
deflate   |3.27 |   42.48 MBit/s |  250.36 MBit/s
lz4hc |2.40 |  104.14 MBit/s | 1150.53 MBit/s
lzo   |2.27 |  444.77 MBit/s |  886.97 MBit/s
lz4   |2.18 |  453.08 MBit/s | 1101.45 MBit/s
842   |1.65 |   64.10 MBit/s |  158.40 MBit/s
zbewalgo  |1.19 |   52.89 MBit/s |  197.58 MBit/s


- 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the
running hpcg-benchmark. At the time of the snapshot, that application
performed a sparse matrix - vector multiplication.

algorithm | ratio   | compression| decompression
zbewalgo  |   16.16 |  179.97 MBit/s |  468.36 MBit/s
deflate   |9.52 |   65.11 MBit/s |  632.69 MBit/s
lz4hc |4.96 |  193.33 MBit/s | 1607.12 MBit/s
842   |4.20 |  150.99 MBit/s |  316.22 MBit/s
lzo   |4.14 |  922.74 MBit/s |  865.32 MBit/s
lz4   |3.79 |  908.39 MBit/s | 1375.33 MBit/s


- 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar, but
not equal. This array is produced by a partial differential equation solver
using a Jakobi-implementation.

algorithm | ratio   | compression| decompression
zbewalgo  |1.30 |  203.30 MBit/s |  530.87 MBit/s
deflate   |1.02 |   37.06 MBit/s | 1131.88 MBit/s
lzo   |1.00 | 1741.46 MBit/s | 2012.78 MBit/s
lz4   |1.00 | 1458.08 MBit/s | 2013.88 MBit/s
lz4hc |1.00 |  173.19 MBit/s | 2012.37 MBit/s
842   |1.00 |   64.10 MBit/s | 2013.64 MBit/s


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

Changes since v2:

- added linux-kernel Mailinglist

Changes since v1:

- improved documentation
- improved code style
- replaced numerous casts with get_unaligned*
- added tests in crypto/testmgr.h/c
- added zBeWalgo to the list of algorithms shown by 
  /sys/block/zram0/comp_algorithm

---
crypto/Kconfig|   8 +
crypto/Makefile   |   1 +
crypto/testmgr.c  |  10 +
crypto/testmgr.h  | 134 +
crypto/zbewalgo.c | 176 
drivers/block/zram/zcomp.c|   3 +
drivers/block/zram/zram_drv.h |   4 +-
include/linux/zbewalgo.h  |  53 
lib/Kconfig   |   3 +
lib/Makefile  |   1 +
lib/zbewalgo/BWT.c| 112 
lib

[PATCH v2] crypto: add zBeWalgo compression for zram

2018-02-15 Thread Benjamin Warnke
 encoding
- bewalgo: I invented this algorithm for my bachelors thesis
   'Page-Based compression in the Linux Kernel'. This algorithm is
   mainly inspired by lz4, focusing on increasing the speed even more,
   with the help of page aligned read an write access. To achieve the
   page alignment, the input and output data is accessed only in
   blocks of 8 Bytes, therefore the encoding of the compressed data is
   changed.
   
https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
   algorithm. The input is read in blocks of 8 Bytes. These Blocks
   are added to an avl-tree. The avl-tree is mapped directly to an
   array. The encoding is a variation of Run Length Encoding using the
   indices in the avl-tree as data. The reason for using the tree
   with indices is, that the indices can be encoded in less then
   8 Bytes each.


Benchmarks:


To obtain reproducable benchmarks, the datasets were first loaded into a
userspace-program. Than the data is written directly to a clean
zram-partition without any filesystem. Between writing and reading 'sync'
and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are
wall clock times, and the benchmarks are using only one cpu-core at a time.
The new algorithm is compared to all available compression algorithms from
the crypto-api.

Zstd is not in the list of compared algorithms, because it is not available
for Zram in the currently available kernel trees.

- '/dev/zero' (8 GiB) This dataset is used to measure the speed limitations
for ZRAM. ZRAM filters zero-data internally and does not even call the
specified compression algorithm.

algorithm | compression| decompression
--zram--  | 2724.08 MBit/s | 2828.87 MBit/s


- 'ecoham' (100 MiB) This dataset is one of the input files for the scientific
 application ECOHAM which runs an ocean simulation. This dataset contains a
 lot of zeros. Where the data is not zero there are arrays of floating point
 values, adjacent float values are likely to be similar to each other,
 allowing for high compression ratios.

algorithm | ratio   | compression| decompression
zbewalgo  |   12.94 |  294.10 MBit/s | 1242.59 MBit/s
deflate   |   12.54 |   75.51 MBit/s |  736.39 MBit/s
842   |   12.26 |  182.59 MBit/s |  683.61 MBit/s
lz4hc |   12.00 |   51.23 MBit/s | 1524.73 MBit/s
lz4   |   10.68 | 1334.37 MBit/s | 1603.54 MBit/s
lzo   |9.79 | 1333.76 MBit/s | 1534.63 MBit/s


- 'source-code' (800 MiB) This dataset is a tarball of the source-code from a
 linux-kernel.

algorithm | ratio   | compression| decompression
deflate   |3.27 |   42.48 MBit/s |  250.36 MBit/s
lz4hc |2.40 |  104.14 MBit/s | 1150.53 MBit/s
lzo   |2.27 |  444.77 MBit/s |  886.97 MBit/s
lz4   |2.18 |  453.08 MBit/s | 1101.45 MBit/s
842   |1.65 |   64.10 MBit/s |  158.40 MBit/s
zbewalgo  |1.19 |   52.89 MBit/s |  197.58 MBit/s


- 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the
 running hpcg-benchmark. At the time of the snapshot, that application
 performed a sparse matrix - vector multiplication.

algorithm | ratio   | compression| decompression
zbewalgo  |   16.16 |  179.97 MBit/s |  468.36 MBit/s
deflate   |9.52 |   65.11 MBit/s |  632.69 MBit/s
lz4hc |4.96 |  193.33 MBit/s | 1607.12 MBit/s
842   |4.20 |  150.99 MBit/s |  316.22 MBit/s
lzo   |4.14 |  922.74 MBit/s |  865.32 MBit/s
lz4   |3.79 |  908.39 MBit/s | 1375.33 MBit/s


- 'partdiff' (8 GiB) Array of double values. Adjacent doubles are similar, but
 not equal. This array is produced by a partial differential equation solver
 using a Jakobi-implementation.

algorithm | ratio   | compression| decompression
zbewalgo  |1.30 |  203.30 MBit/s |  530.87 MBit/s
deflate   |1.02 |   37.06 MBit/s | 1131.88 MBit/s
lzo   |1.00 | 1741.46 MBit/s | 2012.78 MBit/s
lz4   |1.00 | 1458.08 MBit/s | 2013.88 MBit/s
lz4hc |1.00 |  173.19 MBit/s | 2012.37 MBit/s
842   |1.00 |   64.10 MBit/s | 2013.64 MBit/s


Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

Changes since v1:

- improved documentation
- improved code style
- replaced numerous casts with get_unaligned*
- added tests in crypto/testmgr.h/c
- added zBeWalgo to the list of algorithms shown by 
/sys/block/zram0/comp_algorithm

---
 crypto/Kconfig|   8 +
 crypto/Makefile   |   1 +
 crypto/testmgr.c  |  10 +
 crypto/testmgr.h  | 134 +
 crypto/zbewalgo.c | 176 
 drivers/block/zram/zcomp.c|   3 +
 drivers/block/zram/zram_drv.h |   4 +-
 include/linux/zbewalgo.h  |  53 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c

Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-31 Thread Benjamin Warnke
Hi,

I am working on a new Version for this patch addressing all comments, and 
following all guidelines.

Best Regards,
Benjamin Warnke


Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-30 Thread Benjamin Warnke
Hi,

I modified my code as suggested by Stephan and Eric.

Moving the code from the header files into *.c source files slowed down the 
compression and decompression speed by a factor of up to 20.
I made no changes to the code itself, that would explain, why it is so much 
slower.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

---
 crypto/Kconfig|   8 +
 crypto/Makefile   |   1 +
 crypto/zbewalgo.c | 208 
 include/linux/zbewalgo.h  |  59 +
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 111 +
 lib/zbewalgo/BWT.h|  34 +++
 lib/zbewalgo/JBE.c| 131 ++
 lib/zbewalgo/JBE.h|  26 ++
 lib/zbewalgo/JBE2.c   | 135 +++
 lib/zbewalgo/JBE2.h   |  26 ++
 lib/zbewalgo/MTF.c|  98 
 lib/zbewalgo/MTF.h|  26 ++
 lib/zbewalgo/Makefile |   4 +
 lib/zbewalgo/RLE.c| 130 ++
 lib/zbewalgo/RLE.h|  26 ++
 lib/zbewalgo/bewalgo.c| 369 +
 lib/zbewalgo/bewalgo.h|  26 ++
 lib/zbewalgo/bewalgo2.c   | 385 ++
 lib/zbewalgo/bewalgo2.h   |  26 ++
 lib/zbewalgo/bitshuffle.c | 101 
 lib/zbewalgo/bitshuffle.h |  26 ++
 lib/zbewalgo/huffman.c| 227 ++
 lib/zbewalgo/huffman.h|  26 ++
 lib/zbewalgo/include.h| 106 +
 lib/zbewalgo/zbewalgo.c   | 590 ++
 27 files changed, 2909 insertions(+)
 create mode 100644 crypto/zbewalgo.c
 create mode 100644 include/linux/zbewalgo.h
 create mode 100644 lib/zbewalgo/BWT.c
 create mode 100644 lib/zbewalgo/BWT.h
 create mode 100644 lib/zbewalgo/JBE.c
 create mode 100644 lib/zbewalgo/JBE.h
 create mode 100644 lib/zbewalgo/JBE2.c
 create mode 100644 lib/zbewalgo/JBE2.h
 create mode 100644 lib/zbewalgo/MTF.c
 create mode 100644 lib/zbewalgo/MTF.h
 create mode 100644 lib/zbewalgo/Makefile
 create mode 100644 lib/zbewalgo/RLE.c
 create mode 100644 lib/zbewalgo/RLE.h
 create mode 100644 lib/zbewalgo/bewalgo.c
 create mode 100644 lib/zbewalgo/bewalgo.h
 create mode 100644 lib/zbewalgo/bewalgo2.c
 create mode 100644 lib/zbewalgo/bewalgo2.h
 create mode 100644 lib/zbewalgo/bitshuffle.c
 create mode 100644 lib/zbewalgo/bitshuffle.h
 create mode 100644 lib/zbewalgo/huffman.c
 create mode 100644 lib/zbewalgo/huffman.h
 create mode 100644 lib/zbewalgo/include.h
 create mode 100644 lib/zbewalgo/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b44c0ae04..f63f543e9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1667,6 +1667,14 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "ZBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the ZBeWalgo algorithm.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
new file mode 100644
index 0..3d12f9cdf
--- /dev/null
+++ b/crypto/zbewalgo.c
@@ -0,0 +1,208 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2018 Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "linux/zbewalgo.h"
+
+
+struct zbewalgo_ctx {
+   void *zbewalgo_comp_mem;
+};
+
+static void *zbewalgo_alloc_ctx(struct crypto_scomp *tfm)
+{
+   void *ctx;
+
+   ctx = vmalloc(zbewalgo_get_wrkmem_size());
+   if (!ctx)
+   return ERR_PTR(-ENOMEM);
+   return ctx;
+}
+
+static int zbewalgo_init(struct crypto_tfm *tfm)
+{
+   struct zbewalgo_ctx *ctx = crypto_tfm_

Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-30 Thread Benjamin Warnke
Hi Stephan,


>>> In general: I do not think that having larger C functions in header files
>>> is a proper coding style.
>> 
>> How should I solve this?
>> 
>> Option 1:
>> Move everything in the lib/zbewalgo folder into a single source file.
>> This way there is no function defined in a header file.
>> I separated the code into different files because the different partial
>> compression algorithms are independent from each other.
>> 
>> Option 2:
>> Move each header file in its own *.c file, while keeping the
>> function-declarations in the header. If the code is scattered in multiple
>> source files each of the partial algorithms would show up as an independent
>> module. All these modules would load simultaneously with the module
>> zbewalgo_compress The module zbewalgo_compress requires an array of all
>> partial algorithms. This would spam the 'lsmod' list with unneccessary
>> details.
> 
> A module may be compiled from multiple C files. So, moving the code into 
> separate C files and link them into one object / KO should be considered.

I will start moving the code into multiple C files by tomorrow.

>>> Why are there __attribute__ ((unused)) function parameters, such as in
>>> compress_bitshuffle and others?
>> 
>> The zbewalgo algorithm is a container-algorithm for compression functions
>> with the ability to concatenate multiple algorithms. To be able to call any
>> compression algorithm the same way, I defined 'struct zbewalgo_alg' as the
>> interface for all those independent compression algorithms. Some of the
>> partial algorithms do not require all parameters. To silence compiler
>> warnings (if additional warning flags are enabled) I explicitly add the
>> 'unused'-parameter
> 
> Linux does not enable the compiler warning about unused parameters.

This is my first patch request.
I read somewhere, that the code should be compiled with additional warnings 
enabled before submission, to ensure, that the patch does not include useless 
code.
I will remove that attribute.

Benjamin


Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-30 Thread Benjamin Warnke
Hi Eric,

>> Currently ZRAM uses the compression-algorithms from the crypto-api.
>> None of the current compression-algorithms in the crypto-api is designed
>> to compress 4KiB chunks of data efficiently.
>> This patch adds a new compression-algorithm especially designed for ZRAM,
>> to compress small pieces of data more efficiently.
> 
> This is some interesting work, and I like the idea of doing transforms
> specialized for in-memory data.  However, where can I find more information
> about this new compression algorithm?  

I invented this algorithm during the last months for an university lecture. I 
have not published it before, that is why you can not find anything at google.

> What does "zbewalgo" even stand for /
> mean?  Googling it turns up nothing.

zbewalgo: z=zram be=Benjamin wa=Warnke algo=Algorithm

> You are going to have to be much more specific what you mean by "efficiently".
> Efficiency usually implies speed, yet even by your own numbers LZ4 is much
> faster than "zbewalgo", both for compression and decompression.

In this context efficient refers to the compression ratio.

> If the goal is to provide an algorithm more tuned for compression ratio than
> speed in comparison to LZ4, then the omission of Zstandard from your 
> benchmarks
> is strange, especially given that Zstandard is available in the kernel now.

I was not aware of Zstandard in the kernel.

#echo zstandard > /sys/block/zram0/comp_algorithm
returns 
"write error: Invalid argument"

#cat /sys/block/zram0/comp_algorithm
does not show Zstandard

I pulled the latest commit from 
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
There is no CONFIG_CRYPTO_ZSTD (or similar) flag in my .config file.

How can I enable Zstandard for use within zram?

> The proposed "zbewalgo" decompressor also doesn't handle invalid inputs, which
> means it cannot be used on untrusted data.  
> This isn't acceptable without
> justification (since people may use it on untrusted data, creating security
> vulnerabilities), and it also makes it not really a fair comparison with LZ4
> since the LZ4 decompressor does handle invalid inputs, at least in the mode
> exposed through the crypto API.

Since zbewalgo is intended for use within zram, the data-source for the 
decompression algorithm is the same computer.
I assume, that the data is never corrupted in any way, because it is never 
exposed to untrusted hardware.

To catch all invalid inputs a lot of branches would be required.
Each additional if-branch in the code for handling untrusted data slows down 
the algorithm.
In the zram context only blocks of 4KiB are going to be compressed.
The zbewalgo algorithm executes multiple (de)compression steps in a row.
Each step would need to fully verify, that the data is valid, even if the 
preceding algorithm does not detect an error.
All together, the relative cpu-time cost for invalid data is high.

I could add a comment to the decompression functions, but that would not be 
visible for the final user.

Benjamin


Re: Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-30 Thread Benjamin Warnke
Hi Stephan,

thanks for your fast response.

> Please run checkpatch.pl on the patch and fix the formatting issues.

I've run checkpatch.pl again and fixed all errors and warnings except the 
warnings about printk.
Compression does not have it's own subsystem, that is why I used 
printk(KERN_INFO ...

> In general: I do not think that having larger C functions in header files is 
> a 
> proper coding style. 

How should I solve this?

Option 1:
Move everything in the lib/zbewalgo folder into a single source file.
This way there is no function defined in a header file.
I separated the code into different files because the different partial 
compression algorithms are independent from each other.

Option 2:
Move each header file in its own *.c file, while keeping the 
function-declarations in the header.
If the code is scattered in multiple source files each of the partial 
algorithms would show up as an independent module.
All these modules would load simultaneously with the module zbewalgo_compress
The module zbewalgo_compress requires an array of all partial algorithms.
This would spam the 'lsmod' list with unneccessary details.

I would prefer option 1, since it adds less overhead.

> Also, having static variables header files is also not 
> nice.

I will remove the static modifier for variables in the header files

> Do not redefine code that already exists. For example, MIN/MAX exists: min_t 
> and max_t.

Ok, I will use min_t and max_t

> Why are there __attribute__ ((unused)) function parameters, such as in 
> compress_bitshuffle and others?

The zbewalgo algorithm is a container-algorithm for compression functions with 
the ability to concatenate multiple algorithms.
To be able to call any compression algorithm the same way, I defined 'struct 
zbewalgo_alg' as the interface for all those independent compression algorithms.
Some of the partial algorithms do not require all parameters. To silence 
compiler warnings (if additional warning flags are enabled) I explicitly add 
the 'unused'-parameter

Best regards,
Benjamin


Subject: [PATCH] crypto: add zbewalgo compression algorithm for zram

2018-01-30 Thread Benjamin Warnke
Currently ZRAM uses the compression-algorithms from the crypto-api.
None of the current compression-algorithms in the crypto-api is designed
to compress 4KiB chunks of data efficiently.
This patch adds a new compression-algorithm especially designed for ZRAM,
to compress small pieces of data more efficiently.

Benchmarks:

To obtain reproduceable benchmarks, the datasets were first loaded into
a userspace-program. Than the data is written directly to a clean
zram-partition without any filesystem. Between writing and reading
'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time
measurements are wall clock times, and the benchmarks are using only
one cpu-core at a time. The new algorithm is compared to all available
compression algorithms from the crypto-api.


- '/dev/zero' (8 GiB) This dataset is used to measure the speed
  limitations for ZRAM. ZRAM filters zero-data internally and does not
  even call the specified compression algorithm.

algorithm | compression   | decompression
 --zram-- | 2913.92 MiB/s | 3116.38 MiB/s


 - 'ecoham' (100 MiB) This dataset is one of the input files for the
  scientific application ECOHAM which runs an ocean simulation. This
  dataset contains a lot of zeros. Where the data is not zero there are
  arrays of floating point values, neighboring float values are likely
  to be similar to each other, allowing high compression ratios.

algorithm | ratio | compression   | decompression
zbewalgo  | 12.77 |  417.35 MiB/s | 1306.21 MiB/s
deflate   | 12.54 |   73.51 MiB/s |  788.85 MiB/s
842   | 12.26 |  199.22 MiB/s |  675.74 MiB/s
lz4hc | 12.00 |   49.99 MiB/s | 1787.79 MiB/s
lz4   | 10.68 | 1356.77 MiB/s | 1690.34 MiB/s
lzo   |  9.79 | 1363.05 MiB/s | 1640.70 MiB/s


 - 'source-code' (800 MiB) This dataset is a tarball of the source-code
  from a linux-kernel.

algorithm | ratio | compression   | decompression
deflate   |  3.27 |   42.36 MiB/s |  256.22 MiB/s
lz4hc |  2.40 |   99.62 MiB/s | 1187.15 MiB/s
lzo   |  2.27 |  432.95 MiB/s |  944.74 MiB/s
lz4   |  2.18 |  444.07 MiB/s | 1159.49 MiB/s
zbewalgo  |  1.89 |   43.44 MiB/s |   35.73 MiB/s
842   |  1.65 |   62.73 MiB/s |  149.71 MiB/s


 - 'hpcg' (8 GiB) This dataset is a (partial) memory-snapshot of the
  running hpcg-benchmark. At the time of the snapshot, that application
  performed a sparse matrix - vector multiplication.

algorithm | ratio | compression   | decompression
zbewalgo  | 15.88 |  174.49 MiB/s |  372.03 MiB/s
deflate   |  9.52 |   64.47 MiB/s |  705.95 MiB/s
lz4hc |  4.96 |  190.97 MiB/s | 1727.96 MiB/s
842   |  4.20 |  144.33 MiB/s |  284.99 MiB/s
lzo   |  4.14 |  910.92 MiB/s | 1272.87 MiB/s
lz4   |  3.79 |  868.92 MiB/s | 1500.85 MiB/s


 - 'partdiff' (8 GiB) Array of double values. Neighbored values are
  similar, but not equal. This array is produced by an partial
  differential equation solver using a Jakobi-implementation.

algorithm | ratio | compression   | decompression
zbewalgo  |  1.17 |  220.36 MiB/s |  678.05 MiB/s
deflate   |  1.02 |   36.87 MiB/s | 1191.34 MiB/s
lzo   |  1.00 | 1727.67 MiB/s | 2103.98 MiB/s
lz4   |  1.00 | 1417.10 MiB/s | 2127.88 MiB/s
lz4hc |  1.00 |  150.14 MiB/s | 2130.08 MiB/s
842   |  1.00 |   63.99 MiB/s | 2131.95 MiB/s


In the category 'compression ratio', the new algorithm zbewalgo is often
the best choice. The faster algorithms with lower compression ratio are
going to fill the zram-partition faster and finally writing their data
to a backing device - which in turn is much slower than imediately using the
zbewalgo algorithm.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>

---
 crypto/Kconfig   |   8 +
 crypto/Makefile  |   1 +
 crypto/zbewalgo.c| 207 +
 include/linux/zbewalgo.h |  59 ++
 lib/Kconfig  |   3 +
 lib/Makefile |   1 +
 lib/zbewalgo/BWT.h   | 116 
 lib/zbewalgo/JBE.h   | 135 ++
 lib/zbewalgo/JBE2.h  | 139 ++
 lib/zbewalgo/MTF.h   | 102 ++
 lib/zbewalgo/Makefile|   3 +
 lib/zbewalgo/RLE.h   | 134 ++
 lib/zbewalgo/bewalgo.h   | 378 ++
 lib/zbewalgo/bewalgo2.h  | 388 +++
 lib/zbewalgo/bitshuffle.h| 105 +++
 lib/zbewalgo/huffman.h   | 232 +++
 lib/zbewalgo/include.h   | 151 +++
 lib/zbewalgo/settings.h  | 220 ++
 lib/zbewalgo/zbewalgo_compress.c | 372 +
 19 files changed, 2754 insertions(+)
 create mode 100644 crypto/zbewalgo.c
 create mode 100644 include/linux/zbewalgo.h
 create mode 100644 lib/zbewalgo/BWT.h
 create mode 100644 lib/zbewalgo/JBE.h
 create mode 100644 lib/zbewalgo/JBE2.h
 cr