This patchset allows to specify dictionary size for compression algorithms.

change since v2:
 - does not touch kite_deflate.c
 - does not add unneeded macro definitions unrelated to on-disk format
 - modify the hacky way to get max_dict_size in z_erofs_build_compr_cfgs()
 - some rename, reorganize and code style fix

Yifan Zhao (3):
  erofs-utils: mkfs: merge erofs_compressor_setlevel() into
    erofs_compressor_init()
  erofs-utils: mkfs: allow to specify dictionary size for compression
    algorithms
  erofs-utils: mkfs: reorganize logic in erofs_compressor_init()

 include/erofs/config.h      |  10 ++--
 lib/compress.c              |  37 +++++++------
 lib/compress_hints.c        |   2 +-
 lib/compressor.c            |  45 +++++++++++-----
 lib/compressor.h            |   9 ++--
 lib/compressor_deflate.c    |  40 ++++++++++----
 lib/compressor_libdeflate.c |  14 +++--
 lib/compressor_liblzma.c    |  54 ++++++++++++-------
 lib/compressor_lz4.c        |   2 -
 lib/compressor_lz4hc.c      |   5 +-
 lib/config.c                |   4 +-
 lib/inode.c                 |   2 +-
 mkfs/main.c                 | 104 ++++++++++++++++++++++++++++--------
 13 files changed, 229 insertions(+), 99 deletions(-)

Interdiff against v2:
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 7dbfd1e..eecf575 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -34,7 +34,7 @@ enum {
 
 #define EROFS_MAX_COMPR_CFGS           64
 
-struct erofs_compr_cfg {
+struct erofs_compr_opts {
        char *alg;
        int level;
        u32 dict_size;
@@ -70,7 +70,7 @@ struct erofs_configure {
        char *c_src_path;
        char *c_blobdev_path;
        char *c_compress_hints_file;
-       struct erofs_compr_cfg c_compr_opts[EROFS_MAX_COMPR_CFGS];
+       struct erofs_compr_opts c_compr_opts[EROFS_MAX_COMPR_CFGS];
        char c_force_inodeversion;
        char c_force_chunkformat;
        /* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 72f0ca6..eba6c26 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -322,7 +322,6 @@ struct z_erofs_lzma_cfgs {
        u8 reserved[8];
 } __packed;
 
-#define Z_EROFS_LZMA_DEFAULT_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 #define Z_EROFS_LZMA_MAX_DICT_SIZE     (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 
 /* 6 bytes (+ length field = 8 bytes) */
@@ -331,9 +330,6 @@ struct z_erofs_deflate_cfgs {
        u8 reserved[5];
 } __packed;
 
-#define Z_EROFS_DEFLATE_DEFULT_DICT_SIZE       (1U << 15)
-#define Z_EROFS_DEFLATE_MAX_DICT_SIZE          (1U << 15)
-
 /*
  * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
  *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
diff --git a/lib/compress.c b/lib/compress.c
index b7ee9ec..ea9d00d 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -1124,11 +1124,10 @@ err_free_meta:
 
 static int z_erofs_build_compr_cfgs(struct erofs_sb_info *sbi,
                                    struct erofs_buffer_head *sb_bh,
-                                   struct erofs_compress_cfg *ccfg)
+                                   u32 *max_dict_size)
 {
        struct erofs_buffer_head *bh = sb_bh;
-       int i, ret = 0;
-       u32 dict_size = 0;
+       int ret = 0;
 
        if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZ4)) {
                struct {
@@ -1155,22 +1154,15 @@ static int z_erofs_build_compr_cfgs(struct 
erofs_sb_info *sbi,
        }
 #ifdef HAVE_LIBLZMA
        if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_LZMA)) {
-               for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
-                       if (ccfg[i].enable &&
-                           ccfg[i].algorithmtype == Z_EROFS_COMPRESSION_LZMA) {
-                               dict_size = ccfg[i].handle.dict_size;
-                               break;
-                       }
-               }
-               DBG_BUGON(!dict_size);
-
                struct {
                        __le16 size;
                        struct z_erofs_lzma_cfgs lzma;
                } __packed lzmaalg = {
                        .size = cpu_to_le16(sizeof(struct z_erofs_lzma_cfgs)),
                        .lzma = {
-                               .dict_size = cpu_to_le32(dict_size),
+                               .dict_size = cpu_to_le32(
+                                       max_dict_size
+                                               [Z_EROFS_COMPRESSION_LZMA]),
                        }
                };
 
@@ -1186,23 +1178,15 @@ static int z_erofs_build_compr_cfgs(struct 
erofs_sb_info *sbi,
        }
 #endif
        if (sbi->available_compr_algs & (1 << Z_EROFS_COMPRESSION_DEFLATE)) {
-               for (i = 0; i < EROFS_MAX_COMPR_CFGS; i++) {
-                       if (ccfg[i].enable &&
-                           ccfg[i].algorithmtype == 
Z_EROFS_COMPRESSION_DEFLATE) {
-                               dict_size = ccfg[i].handle.dict_size;
-                               break;
-                       }
-               }
-               DBG_BUGON(!dict_size);
-
                struct {
                        __le16 size;
                        struct z_erofs_deflate_cfgs z;
                } __packed zalg = {
                        .size = cpu_to_le16(sizeof(struct 
z_erofs_deflate_cfgs)),
                        .z = {
-                               .windowbits =
-                                       cpu_to_le32(ilog2(dict_size)),
+                               .windowbits = cpu_to_le32(ilog2(
+                                       max_dict_size
+                                               [Z_EROFS_COMPRESSION_DEFLATE])),
                        }
                };
 
@@ -1222,6 +1206,7 @@ static int z_erofs_build_compr_cfgs(struct erofs_sb_info 
*sbi,
 int z_erofs_compress_init(struct erofs_sb_info *sbi, struct erofs_buffer_head 
*sb_bh)
 {
        int i, ret;
+       u32 max_dict_size[Z_EROFS_COMPRESSION_MAX];
 
        for (i = 0; cfg.c_compr_opts[i].alg; ++i) {
                struct erofs_compress *c = &erofs_ccfg[i].handle;
@@ -1238,6 +1223,9 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, 
struct erofs_buffer_head *s
                sbi->available_compr_algs |= 1 << erofs_ccfg[i].algorithmtype;
                if (erofs_ccfg[i].algorithmtype != Z_EROFS_COMPRESSION_LZ4)
                        erofs_sb_set_compr_cfgs(sbi);
+               max_dict_size[erofs_ccfg[i].algorithmtype] =
+                       max(max_dict_size[erofs_ccfg[i].algorithmtype],
+                           c->dict_size);
        }
 
        /*
@@ -1271,7 +1259,7 @@ int z_erofs_compress_init(struct erofs_sb_info *sbi, 
struct erofs_buffer_head *s
        }
 
        if (erofs_sb_has_compr_cfgs(sbi))
-               return z_erofs_build_compr_cfgs(sbi, sb_bh, erofs_ccfg);
+               return z_erofs_build_compr_cfgs(sbi, sb_bh, max_dict_size);
        return 0;
 }
 
diff --git a/lib/compressor.c b/lib/compressor.c
index 27b4077..290746e 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -102,26 +102,28 @@ int erofs_compressor_init(struct erofs_sb_info *sbi, 
struct erofs_compress *c,
 
                if (erofs_algs[i].c->setlevel) {
                        ret = erofs_algs[i].c->setlevel(c, compression_level);
-                       if (ret)
-                               return ret;
-               } else {
-                       if (compression_level >= 0)
-                               erofs_warn(
-                                       "compression level %d is ignored for 
%s",
+                       if (ret) {
+                               erofs_err("failed to set compression level %d 
for %s",
                                          compression_level, alg_name);
-                       c->compression_level = 0;
+                               return ret;
+                       }
+               } else if (compression_level >= 0) {
+                       erofs_err("compression level is not supported for %s",
+                                 alg_name);
+                       return -EINVAL;
                }
 
                if (erofs_algs[i].c->setdictsize) {
                        ret = erofs_algs[i].c->setdictsize(c, dict_size);
-                       if (ret)
-                               return ret;
-               } else {
-                       if (dict_size)
-                               erofs_warn(
-                                       "dictionary size %u is ignored for %s",
+                       if (ret) {
+                               erofs_err("failed to set dict size %u for %s",
                                          dict_size, alg_name);
-                       c->dict_size = 0;
+                               return ret;
+                       }
+               } else if (dict_size) {
+                       erofs_err("dict size is not supported for %s",
+                                 alg_name);
+                       return -EINVAL;
                }
 
                ret = erofs_algs[i].c->init(c);
diff --git a/lib/compressor_deflate.c b/lib/compressor_deflate.c
index d9f8a91..479ad56 100644
--- a/lib/compressor_deflate.c
+++ b/lib/compressor_deflate.c
@@ -84,8 +84,8 @@ static int erofs_compressor_deflate_setdictsize(struct 
erofs_compress *c,
 const struct erofs_compressor erofs_compressor_deflate = {
        .default_level = 1,
        .best_level = 9,
-       .default_dictsize = Z_EROFS_DEFLATE_DEFULT_DICT_SIZE,
-       .max_dictsize = Z_EROFS_DEFLATE_MAX_DICT_SIZE,
+       .default_dictsize = 1 << 15,
+       .max_dictsize = 1 << 15,
        .init = compressor_deflate_init,
        .exit = compressor_deflate_exit,
        .setlevel = erofs_compressor_deflate_setlevel,
diff --git a/lib/compressor_liblzma.c b/lib/compressor_liblzma.c
index 0203a5c..b8ae29c 100644
--- a/lib/compressor_liblzma.c
+++ b/lib/compressor_liblzma.c
@@ -73,7 +73,8 @@ static int erofs_compressor_liblzma_setdictsize(struct 
erofs_compress *c,
        if (dict_size == 0)
                dict_size = erofs_compressor_lzma.default_dictsize;
 
-       if (dict_size > erofs_compressor_lzma.max_dictsize) {
+       if (dict_size > erofs_compressor_lzma.max_dictsize ||
+           dict_size < 4096) {
                erofs_err("invalid dict size %u", dict_size);
                return -EINVAL;
        }
@@ -111,7 +112,7 @@ static int erofs_compressor_liblzma_init(struct 
erofs_compress *c)
 const struct erofs_compressor erofs_compressor_lzma = {
        .default_level = LZMA_PRESET_DEFAULT,
        .best_level = 109,
-       .default_dictsize = Z_EROFS_LZMA_DEFAULT_DICT_SIZE,
+       .default_dictsize = 8 * Z_EROFS_PCLUSTER_MAX_SIZE,
        .max_dictsize = Z_EROFS_LZMA_MAX_DICT_SIZE,
        .init = erofs_compressor_liblzma_init,
        .exit = erofs_compressor_liblzma_exit,
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
index 2357f76..8667954 100644
--- a/lib/kite_deflate.c
+++ b/lib/kite_deflate.c
@@ -5,7 +5,6 @@
  * Copyright (C) 2023, Alibaba Cloud
  * Copyright (C) 2023, Gao Xiang <[email protected]>
  */
-#include "erofs/internal.h"
 #include "erofs/defs.h"
 #include "erofs/print.h"
 #include <errno.h>
@@ -23,7 +22,7 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
 #define kite_dbg(x, ...)
 #endif
 
-#define kHistorySize32         Z_EROFS_DEFLATE_DEFULT_DICT_SIZE
+#define kHistorySize32         (1U << 15)
 
 #define kNumLenSymbols32       256
 #define kNumLenSymbolsMax      kNumLenSymbols32
diff --git a/mkfs/main.c b/mkfs/main.c
index 8a9c3cc..acb2108 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -310,43 +310,31 @@ handle_fragment:
        return 0;
 }
 
-static int mkfs_parse_compress_algs(char *algs)
+static int mkfs_parse_one_compress_alg(char *alg,
+                                      struct erofs_compr_opts *copts)
 {
-       unsigned int i;
-       char *s;
-
-       for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
        char *p, *q, *opt, *endptr;
-               struct erofs_compr_cfg *ccfg;
 
-               if (i >= EROFS_MAX_COMPR_CFGS - 1) {
-                       erofs_err("too many algorithm types");
-                       return -EINVAL;
-               }
+       copts->level = -1;
+       copts->dict_size = 0;
 
-               ccfg = cfg.c_compr_opts + i;
-
-               ccfg->level = -1;
-               ccfg->dict_size = 0;
-
-               p = strchr(s, ',');
+       p = strchr(alg, ',');
        if (p) {
-                       ccfg->alg = strndup(s, p - s);
+               copts->alg = strndup(alg, p - alg);
 
-                       /* backward compatibility */
+               /* support old '-zlzma,9' form */
                if (isdigit(*(p + 1))) {
-                               ccfg->level = strtol(p + 1, &endptr, 10);
+                       copts->level = strtol(p + 1, &endptr, 10);
                        if (*endptr && *endptr != ',') {
-                                       erofs_err(
-                                               "invalid compression level %s",
+                               erofs_err("invalid compression level %s",
                                          p + 1);
                                return -EINVAL;
                        }
-                               continue;
+                       return 0;
                }
        } else {
-                       ccfg->alg = strdup(s);
-                       continue;
+               copts->alg = strdup(alg);
+               return 0;
        }
 
        opt = p + 1;
@@ -357,26 +345,20 @@ static int mkfs_parse_compress_algs(char *algs)
 
                if ((p = strstr(opt, "level="))) {
                        p += strlen("level=");
-                               ccfg->level = strtol(p, &endptr, 10);
-                               if ((endptr == p) ||
-                                   (*endptr && *endptr != ',')) {
-                                       erofs_err(
-                                               "invalid compression level %s",
-                                               p);
+                       copts->level = strtol(p, &endptr, 10);
+                       if ((endptr == p) || (*endptr && *endptr != ',')) {
+                               erofs_err("invalid compression level %s", p);
                                return -EINVAL;
                        }
                } else if ((p = strstr(opt, "dictsize="))) {
                        p += strlen("dictsize=");
-                               ccfg->dict_size = strtoul(p, &endptr, 10);
+                       copts->dict_size = strtoul(p, &endptr, 10);
                        if (*endptr == 'k' || *endptr == 'K')
-                                       ccfg->dict_size <<= 10;
+                               copts->dict_size <<= 10;
                        else if (*endptr == 'm' || *endptr == 'M')
-                                       ccfg->dict_size <<= 20;
-                               else if ((endptr == p) ||
-                                        (*endptr && *endptr != ',')) {
-                                       erofs_err(
-                                               "invalid compression dictsize 
%s",
-                                               p);
+                               copts->dict_size <<= 20;
+                       else if ((endptr == p) || (*endptr && *endptr != ',')) {
+                               erofs_err("invalid compression dictsize %s", p);
                                return -EINVAL;
                        }
                } else {
@@ -384,11 +366,27 @@ static int mkfs_parse_compress_algs(char *algs)
                        return -EINVAL;
                }
 
-                       if (q)
-                               opt = q + 1;
-                       else
-                               opt = NULL;
+               opt = q ? q + 1 : NULL;
        }
+
+       return 0;
+}
+
+static int mkfs_parse_compress_algs(char *algs)
+{
+       unsigned int i;
+       char *s;
+       int ret;
+
+       for (s = strtok(algs, ":"), i = 0; s; s = strtok(NULL, ":"), ++i) {
+               if (i >= EROFS_MAX_COMPR_CFGS - 1) {
+                       erofs_err("too many algorithm types");
+                       return -EINVAL;
+               }
+
+               ret = mkfs_parse_one_compress_alg(s, &cfg.c_compr_opts[i]);
+               if (ret)
+                       return ret;
        }
        return 0;
 }
-- 
2.43.0

Reply via email to