Add a missing change: - change to generate a ctx for duplicate fragment in compression.
On Thu, 1 Dec 2022 19:16:15 +0800 Yue Hu <[email protected]> wrote: > From: Yue Hu <[email protected]> > > Previously, there's no fragment deduplication when this feature is > introduced. Let's support it now. > > We intend to dedupe the fragments before compression, so that duplicate > parts will not be written into packed inode. > > With this patch, for Linux 5.10.1 + 5.10.87 source code: > > [before] > 32k pcluster + T0 + lz4hc,12 + fragment 450M > 64k pcluster + T0 + lz4hc,12 + fragment 434M > 128k pcluster + T0 + lz4hc,12 + fragment 426M > 32k pcluster + T0 + lz4hc,12 + fragment + dedupe 368M > 64k pcluster + T0 + lz4hc,12 + fragment + dedupe 380M > 128k pcluster + T0 + lz4hc,12 + fragment + dedupe 395M > > [after] > 32k pcluster + T0 + lz4hc,12 + fragment 311M > 64k pcluster + T0 + lz4hc,12 + fragment 295M > 128k pcluster + T0 + lz4hc,12 + fragment 287M > 32k pcluster + T0 + lz4hc,12 + fragment + dedupe 286M > 64k pcluster + T0 + lz4hc,12 + fragment + dedupe 281M > 128k pcluster + T0 + lz4hc,12 + fragment + dedupe 278M > > Tested on SquashFS (which uses level 12 by default for lz4hc): > > 32k block + lz4hc 332M > 64k block + lz4hc 304M > 128k block + lz4hc 283M > 256k block + lz4hc 273M > 256k block + lz4hc + noI 278M > > Suggested-by: Gao Xiang <[email protected]> > Signed-off-by: Yue Hu <[email protected]> > --- > v4: > - renaming include tofcrc/new_fragmentsize > - move fixup into ctx > - use may_fixing to check packing fragment or not > - move sb/inode flag + 64bits case from erofs_pack_fragments() to new > helper erofs_fragments_commit() > - move recompress ahead of may_inline case when compressing succeeds > - update commit message/code comments > - note that decompress will fail when enable ztailpacking at the same > time, need some time to debug > > v3: > - modify acrroding to Xiang's comments in v2 > - simplify the logic in vle_compress_one > - fix the crash for 1MB pcluster > > v2: mainly improve the logic in compression > > include/erofs/fragments.h | 4 +- > lib/compress.c | 134 +++++++++++++++++++++----- > lib/fragments.c | 197 ++++++++++++++++++++++++++++++++++++-- > 3 files changed, 305 insertions(+), 30 deletions(-) > > diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h > index 5444384..e91fb31 100644 > --- a/include/erofs/fragments.h > +++ b/include/erofs/fragments.h > @@ -15,8 +15,10 @@ extern "C" > extern const char *frags_packedname; > #define EROFS_PACKED_INODE frags_packedname > > +int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 > *tofcrc_r); > int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, > - unsigned int len); > + unsigned int len, u32 tofcrc); > +void z_erofs_fragments_commit(struct erofs_inode *inode); > struct erofs_inode *erofs_mkfs_build_fragments(void); > int erofs_fragments_init(void); > void erofs_fragments_exit(void); > diff --git a/lib/compress.c b/lib/compress.c > index 17b3213..afd6e8e 100644 > --- a/lib/compress.c > +++ b/lib/compress.c > @@ -33,6 +33,11 @@ struct z_erofs_vle_compress_ctx { > unsigned int head, tail; > erofs_blk_t blkaddr; /* pointing to the next blkaddr */ > u16 clusterofs; > + > + u32 tofcrc; > + unsigned int pclustersize; > + erofs_off_t remaining; > + bool need_fix; > }; > > #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ > @@ -162,10 +167,10 @@ static void z_erofs_write_indexes(struct > z_erofs_vle_compress_ctx *ctx) > ctx->clusterofs = clusterofs + count; > } > > -static int z_erofs_compress_dedupe(struct erofs_inode *inode, > - struct z_erofs_vle_compress_ctx *ctx, > +static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx, > unsigned int *len) > { > + struct erofs_inode *inode = ctx->inode; > int ret = 0; > > do { > @@ -301,10 +306,6 @@ static void tryrecompress_trailing(void *in, unsigned > int *insize, > unsigned int count; > int ret = *compressedsize; > > - /* no need to recompress */ > - if (!(ret & (EROFS_BLKSIZ - 1))) > - return; > - > count = *insize; > ret = erofs_compress_destsize(&compresshandle, > in, &count, (void *)tmp, > @@ -319,30 +320,69 @@ static void tryrecompress_trailing(void *in, unsigned > int *insize, > *compressedsize = ret; > } > > -static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx, > - bool final) > +static void z_erofs_fragments_dedupe_update(struct z_erofs_vle_compress_ctx > *ctx, > + unsigned int *len) > +{ > + struct erofs_inode *inode = ctx->inode; > + const unsigned int new_fragmentsize = ctx->remaining + *len; > + > + DBG_BUGON(!inode->fragment_size); > + > + /* try to fix it again if it gets larger (should be rare) */ > + if (inode->fragment_size < new_fragmentsize) { > + ctx->pclustersize = > + roundup(new_fragmentsize - inode->fragment_size, > + EROFS_BLKSIZ); > + return; > + } > + > + inode->fragmentoff += inode->fragment_size - new_fragmentsize; > + inode->fragment_size = new_fragmentsize; > + > + erofs_dbg("Reducing fragment size to %u at %lu", > + inode->fragment_size, inode->fragmentoff); > + > + /* it's ending */ > + ctx->head += new_fragmentsize; > + ctx->remaining = 0; > + *len = 0; > +} > + > +static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx) > { > static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ]; > struct erofs_inode *inode = ctx->inode; > char *const dst = dstbuf + EROFS_BLKSIZ; > struct erofs_compress *const h = &compresshandle; > unsigned int len = ctx->tail - ctx->head; > + bool final = !ctx->remaining; > int ret; > > while (len) { > - unsigned int pclustersize = > - z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ; > bool may_inline = (cfg.c_ztailpacking && final); > bool may_packing = (cfg.c_fragments && final && > !erofs_is_packed_inode(inode)); > + bool may_fixing = ctx->need_fix; > > - if (z_erofs_compress_dedupe(inode, ctx, &len) && !final) > + if (z_erofs_compress_dedupe(ctx, &len) && !final) > break; > > - if (len <= pclustersize) { > + if (len <= ctx->pclustersize) { > + if (may_fixing && !len) { > + may_packing = false; > + goto frag_nopacking; > + } > if (!final || !len) > break; > if (may_packing) { > + if (inode->fragment_size && !may_fixing) { > + ctx->remaining = inode->fragment_size; > + ctx->pclustersize = > + roundup(len, EROFS_BLKSIZ); > + ctx->e.length = 0; > + ctx->need_fix = true; > + break; > + } > ctx->e.length = len; > goto frag_packing; > } > @@ -353,7 +393,7 @@ static int vle_compress_one(struct > z_erofs_vle_compress_ctx *ctx, > ctx->e.length = min(len, > cfg.c_max_decompressed_extent_bytes); > ret = erofs_compress_destsize(h, ctx->queue + ctx->head, > - &ctx->e.length, dst, pclustersize, > + &ctx->e.length, dst, ctx->pclustersize, > !(final && len == ctx->e.length)); > if (ret <= 0) { > if (ret != -EAGAIN) { > @@ -385,15 +425,17 @@ nocompression: > ctx->e.compressedblks = 1; > ctx->e.raw = true; > } else if (may_packing && len == ctx->e.length && > - ret < pclustersize) { > + ret < ctx->pclustersize && (!inode->fragment_size || > + may_fixing)) { > frag_packing: > ret = z_erofs_pack_fragments(inode, > ctx->queue + ctx->head, > - len); > + len, ctx->tofcrc); > if (ret < 0) > return ret; > ctx->e.compressedblks = 0; /* indicate a fragment */ > ctx->e.raw = true; > + may_fixing = false; > /* tailpcluster should be less than 1 block */ > } else if (may_inline && len == ctx->e.length && > ret < EROFS_BLKSIZ) { > @@ -415,7 +457,27 @@ frag_packing: > } else { > unsigned int tailused, padding; > > - if (may_inline && len == ctx->e.length) > + tailused = ret & (EROFS_BLKSIZ - 1); > + /* > + * If there's a space left for the last round when > + * deduping fragments, try to read fragment and > + * recompress a litte more to check whether it can be > + * filled up. Fix the fragment if succeeds. Otherwise, > + * just drop it and go to packing. > + */ > + if (may_packing && len == ctx->e.length && tailused && > + ctx->tail < sizeof(ctx->queue)) { > + DBG_BUGON(!inode->fragment_size); > + > + ctx->remaining = inode->fragment_size; > + ctx->pclustersize = > + BLK_ROUND_UP(ret) * EROFS_BLKSIZ; > + ctx->e.length = 0; > + ctx->need_fix = true; > + break; > + } > + > + if (may_inline && len == ctx->e.length && tailused) > tryrecompress_trailing(ctx->queue + ctx->head, > &ctx->e.length, dst, &ret); > > @@ -454,6 +516,21 @@ frag_packing: > ctx->head += ctx->e.length; > len -= ctx->e.length; > > + if (may_fixing) > + z_erofs_fragments_dedupe_update(ctx, &len); > + > + /* generate a ctx for duplicate fragment */ > + if (final && !len && inode->fragment_size && !may_packing) { > +frag_nopacking: > + z_erofs_write_indexes(ctx); > + > + ctx->e.length = inode->fragment_size; > + ctx->e.compressedblks = 0; > + ctx->e.raw = true; > + ctx->e.partial = false; > + ctx->e.blkaddr = ctx->blkaddr; > + } > + > if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { > const unsigned int qh_aligned = > round_down(ctx->head, EROFS_BLKSIZ); > @@ -736,7 +813,6 @@ int erofs_write_compressed_file(struct erofs_inode > *inode, int fd) > { > struct erofs_buffer_head *bh; > static struct z_erofs_vle_compress_ctx ctx; > - erofs_off_t remaining; > erofs_blk_t blkaddr, compressed_blocks; > unsigned int legacymetasize; > int ret; > @@ -775,6 +851,16 @@ int erofs_write_compressed_file(struct erofs_inode > *inode, int fd) > inode->idata_size = 0; > inode->fragment_size = 0; > > + /* > + * Dedupe fragments before compression to avoid writing duplicate parts > + * into packed inode. > + */ > + if (cfg.c_fragments && !erofs_is_packed_inode(inode)) { > + ret = z_erofs_fragments_dedupe(inode, fd, &ctx.tofcrc); > + if (ret < 0) > + goto err_bdrop; > + } > + > blkaddr = erofs_mapbh(bh->block); /* start_blkaddr */ > ctx.inode = inode; > ctx.blkaddr = blkaddr; > @@ -782,10 +868,12 @@ int erofs_write_compressed_file(struct erofs_inode > *inode, int fd) > ctx.head = ctx.tail = 0; > ctx.clusterofs = 0; > ctx.e.length = 0; > - remaining = inode->i_size; > + ctx.pclustersize = z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ; > + ctx.remaining = inode->i_size - inode->fragment_size; > + ctx.need_fix = false; > > - while (remaining) { > - const u64 readcount = min_t(u64, remaining, > + while (ctx.remaining) { > + const u64 readcount = min_t(u64, ctx.remaining, > sizeof(ctx.queue) - ctx.tail); > > ret = read(fd, ctx.queue + ctx.tail, readcount); > @@ -793,10 +881,10 @@ int erofs_write_compressed_file(struct erofs_inode > *inode, int fd) > ret = -errno; > goto err_bdrop; > } > - remaining -= readcount; > + ctx.remaining -= readcount; > ctx.tail += readcount; > > - ret = vle_compress_one(&ctx, !remaining); > + ret = vle_compress_one(&ctx); > if (ret) > goto err_free_idata; > } > @@ -807,6 +895,8 @@ int erofs_write_compressed_file(struct erofs_inode > *inode, int fd) > DBG_BUGON(compressed_blocks < !!inode->idata_size); > compressed_blocks -= !!inode->idata_size; > > + z_erofs_fragments_commit(inode); > + > z_erofs_write_indexes(&ctx); > z_erofs_write_indexes_final(&ctx); > legacymetasize = ctx.metacur - compressmeta; > diff --git a/lib/fragments.c b/lib/fragments.c > index b8c37d5..e50937c 100644 > --- a/lib/fragments.c > +++ b/lib/fragments.c > @@ -10,17 +10,181 @@ > #include "erofs/inode.h" > #include "erofs/compress.h" > #include "erofs/print.h" > +#include "erofs/internal.h" > #include "erofs/fragments.h" > > +struct erofs_fragment_dedupe_item { > + struct list_head list; > + unsigned int length, nr_dup; > + erofs_off_t pos; > + u8 data[]; > +}; > + > +#define FRAGMENT_HASHTABLE_SIZE 65536 > +#define FRAGMENT_HASH(crc) (crc & (FRAGMENT_HASHTABLE_SIZE - 1)) > + > +static struct list_head dupli_frags[FRAGMENT_HASHTABLE_SIZE]; > +static unsigned int len_to_hash; /* the fragment length for crc32 hash */ > + > static FILE *packedfile; > const char *frags_packedname = "packed_file"; > > -int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, > - unsigned int len) > +static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd, > + u32 crc) > { > - inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; > - inode->fragmentoff = ftell(packedfile); > - inode->fragment_size = len; > + struct erofs_fragment_dedupe_item *cur, *di = NULL; > + struct list_head *head; > + u8 *data; > + unsigned int length; > + int ret; > + > + head = &dupli_frags[FRAGMENT_HASH(crc)]; > + if (list_empty(head)) > + return 0; > + > + /* XXX: no need to read so much for smaller? */ > + if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ) > + length = inode->i_size; > + else > + length = EROFS_CONFIG_COMPR_MAX_SZ; > + > + data = malloc(length); > + if (!data) > + return -ENOMEM; > + > + ret = lseek(fd, inode->i_size - length, SEEK_SET); > + if (ret == -1) { > + ret = -errno; > + goto out; > + } > + > + ret = read(fd, data, length); > + if (ret != length) { > + ret = -errno; > + goto out; > + } > + > + list_for_each_entry(cur, head, list) { > + unsigned int e1, e2, i = 0; > + > + DBG_BUGON(cur->length < len_to_hash + 1); > + DBG_BUGON(length < len_to_hash + 1); > + > + e1 = cur->length - len_to_hash - 1; > + e2 = length - len_to_hash - 1; > + > + if (memcmp(cur->data + e1 + 1, data + e2 + 1, len_to_hash)) > + continue; > + > + while (i <= min(e1, e2) && cur->data[e1 - i] == data[e2 - i]) > + i++; > + > + if (i && (!di || i + len_to_hash > di->nr_dup)) { > + cur->nr_dup = i + len_to_hash; > + di = cur; > + > + /* full match */ > + if (i == min(e1, e2) + 1) > + break; > + } > + } > + if (!di) > + goto out; > + > + DBG_BUGON(di->length < di->nr_dup); > + > + inode->fragment_size = di->nr_dup; > + inode->fragmentoff = di->pos + di->length - di->nr_dup; > + > + erofs_dbg("Dedupe %u fragment data at %lu", inode->fragment_size, > + inode->fragmentoff); > +out: > + free(data); > + return ret; > +} > + > +int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 > *tofcrc_r) > +{ > + u8 data_to_hash[len_to_hash]; > + u32 crc; > + int ret; > + > + if (inode->i_size <= len_to_hash) > + return 0; > + > + ret = lseek(fd, inode->i_size - len_to_hash, SEEK_SET); > + if (ret == -1) > + return -errno; > + > + ret = read(fd, data_to_hash, len_to_hash); > + if (ret != len_to_hash) > + return -errno; > + > + crc = erofs_crc32c(~0, data_to_hash, len_to_hash); > + *tofcrc_r = crc; > + > + ret = z_erofs_fragments_dedupe_find(inode, fd, crc); > + if (ret < 0) > + return ret; > + > + ret = lseek(fd, 0, SEEK_SET); > + if (ret == -1) > + return -errno; > + return 0; > +} > + > +static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len, > + erofs_off_t pos, u32 crc) > +{ > + struct erofs_fragment_dedupe_item *di; > + > + if (len <= len_to_hash) > + return 0; > + > + di = malloc(sizeof(*di) + len); > + if (!di) > + return -ENOMEM; > + > + memcpy(di->data, data, len); > + di->length = len; > + di->pos = pos; > + di->nr_dup = 0; > + > + list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]); > + return 0; > +} > + > +static inline void z_erofs_fragments_dedupe_init(unsigned int clen) > +{ > + unsigned int i; > + > + for (i = 0; i < FRAGMENT_HASHTABLE_SIZE; ++i) > + init_list_head(&dupli_frags[i]); > + > + len_to_hash = clen; > +} > + > +static void z_erofs_fragments_dedupe_exit(void) > +{ > + struct erofs_fragment_dedupe_item *di, *n; > + struct list_head *head; > + unsigned int i; > + > + for (i = 0; i < FRAGMENT_HASHTABLE_SIZE; ++i) { > + head = &dupli_frags[i]; > + > + if (list_empty(head)) > + continue; > + > + list_for_each_entry_safe(di, n, head, list) > + free(di); > + } > +} > + > +void z_erofs_fragments_commit(struct erofs_inode *inode) > +{ > + if (!inode->fragment_size) > + return; > /* > * If the packed inode is larger than 4GiB, the full fragmentoff > * will be recorded by switching to the noncompact layout anyway. > @@ -28,13 +192,28 @@ int z_erofs_pack_fragments(struct erofs_inode *inode, > void *data, > if (inode->fragmentoff >> 32) > inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY; > > + inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; > + erofs_sb_set_fragments(); > +} > + > +int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, > + unsigned int len, u32 tofcrc) > +{ > + int ret; > + > + inode->fragmentoff = ftell(packedfile); > + inode->fragment_size = len; > + > if (fwrite(data, len, 1, packedfile) != 1) > return -EIO; > > - erofs_sb_set_fragments(); > - > erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size, > inode->fragmentoff); > + > + ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff, > + tofcrc); > + if (ret) > + return ret; > return len; > } > > @@ -50,6 +229,8 @@ void erofs_fragments_exit(void) > { > if (packedfile) > fclose(packedfile); > + > + z_erofs_fragments_dedupe_exit(); > } > > int erofs_fragments_init(void) > @@ -61,5 +242,7 @@ int erofs_fragments_init(void) > #endif > if (!packedfile) > return -ENOMEM; > + > + z_erofs_fragments_dedupe_init(16); > return 0; > }
