Introduce a separate z_erofs_fragments_tofh() to get the tail hash in order to prepare for the upcoming multi-threaded fragment improvement.
Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- include/erofs/fragments.h | 3 +- lib/compress.c | 38 ++++++++++---------- lib/fragments.c | 75 ++++++++++++++++++--------------------- 3 files changed, 57 insertions(+), 59 deletions(-) diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h index a57b63c..75f1055 100644 --- a/include/erofs/fragments.h +++ b/include/erofs/fragments.h @@ -15,7 +15,8 @@ extern "C" extern const char *erofs_frags_packedname; #define EROFS_PACKED_INODE erofs_frags_packedname -int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc); +u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos); +int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh); int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc); int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, diff --git a/lib/compress.c b/lib/compress.c index d046112..a260dc4 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -46,7 +46,7 @@ struct z_erofs_compress_ictx { /* inode context */ int fd; u64 fpos; - u32 tof_chksum; + u32 tofh; bool fix_dedupedfrag; bool fragemitted; bool dedupe; @@ -626,7 +626,7 @@ nocompression: (!inode->fragment_size || ictx->fix_dedupedfrag)) { frag_packing: ret = z_erofs_pack_fragments(inode, ctx->queue + ctx->head, - len, ictx->tof_chksum); + len, ictx->tofh); if (ret < 0) return ret; e->plen = 0; /* indicate a fragment */ @@ -1103,7 +1103,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx, DBG_BUGON(offset != -1 && frag && inode->fragment_size); if (offset != -1 && frag && !inode->fragment_size && cfg.c_fragdedupe != FRAGDEDUPE_OFF) { - ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum); + ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh); if (ret < 0) return ret; if (inode->fragment_size > ctx->remaining) @@ -1622,21 +1622,23 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) inode->z_algorithmtype[0] = ictx->ccfg->algorithmtype; inode->z_algorithmtype[1] = 0; - /* - * Handle tails in advance to avoid writing duplicated - * parts into the packed inode. - */ - if (cfg.c_fragments && !erofs_is_packed_inode(inode) && - ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) { - ret = z_erofs_fragments_dedupe(inode, fd, &ictx->tof_chksum); - if (ret < 0) - goto err_free_ictx; + if (cfg.c_fragments && !erofs_is_packed_inode(inode)) { + ictx->tofh = z_erofs_fragments_tofh(inode, fd, fpos); + if (ictx == &g_ictx && cfg.c_fragdedupe != FRAGDEDUPE_OFF) { + /* + * Handle tails in advance to avoid writing duplicated + * parts into the packed inode. + */ + ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh); + if (ret < 0) + goto err_free_ictx; - if (cfg.c_fragdedupe == FRAGDEDUPE_INODE && - inode->fragment_size < inode->i_size) { - erofs_dbg("Discard the sub-inode tail fragment of %s", - inode->i_srcpath); - inode->fragment_size = 0; + if (cfg.c_fragdedupe == FRAGDEDUPE_INODE && + inode->fragment_size < inode->i_size) { + erofs_dbg("Discard the sub-inode tail fragment of %s", + inode->i_srcpath); + inode->fragment_size = 0; + } } } ictx->inode = inode; @@ -1647,7 +1649,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) ictx->dedupe = false; if (all_fragments && !inode->fragment_size) { - ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tof_chksum); + ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tofh); if (ret) goto err_free_idata; } diff --git a/lib/fragments.c b/lib/fragments.c index 9dfe0e3..9f5f1f9 100644 --- a/lib/fragments.c +++ b/lib/fragments.c @@ -3,9 +3,6 @@ * Copyright (C), 2022, Coolpad Group Limited. * Created by Yue Hu <huy...@coolpad.com> */ -#ifndef _LARGEFILE64_SOURCE -#define _LARGEFILE64_SOURCE -#endif #ifndef _FILE_OFFSET_BITS #define _FILE_OFFSET_BITS 64 #endif @@ -49,23 +46,39 @@ struct erofs_packed_inode { const char *erofs_frags_packedname = "packed_file"; -#ifndef HAVE_LSEEK64 -#define erofs_lseek64 lseek -#else -#define erofs_lseek64 lseek64 -#endif +u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos) +{ + u8 data_to_hash[EROFS_TOF_HASHLEN]; + u32 hash; + int ret; + + if (inode->i_size <= EROFS_TOF_HASHLEN) + return ~0U; + + ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN, + fpos + inode->i_size - EROFS_TOF_HASHLEN); + if (ret < 0) + return -errno; + if (ret != EROFS_TOF_HASHLEN) { + DBG_BUGON(1); + return -EIO; + } + hash = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN); + return hash != ~0U ? hash : 0; +} -static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd, - u32 crc) +int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh) { struct erofs_packed_inode *epi = inode->sbi->packedinode; struct erofs_fragment_dedupe_item *cur, *di = NULL; - struct list_head *head = &epi->hash[FRAGMENT_HASH(crc)]; + struct list_head *head = &epi->hash[FRAGMENT_HASH(tofh)]; unsigned int s1, e1; erofs_off_t deduped; u8 *data; int ret; + if (inode->i_size <= EROFS_TOF_HASHLEN) + return 0; if (list_empty(head)) return 0; @@ -138,27 +151,13 @@ static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd, return 0; } -int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc) -{ - u8 data_to_hash[EROFS_TOF_HASHLEN]; - int ret; - - if (inode->i_size <= EROFS_TOF_HASHLEN) - return 0; - - ret = pread(fd, data_to_hash, EROFS_TOF_HASHLEN, - inode->i_size - EROFS_TOF_HASHLEN); - if (ret != EROFS_TOF_HASHLEN) - return -errno; - - *tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN); - return z_erofs_fragments_dedupe_find(inode, fd, *tofcrc); -} - -static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data, - unsigned int len, erofs_off_t pos) +static int z_erofs_fragments_dedupe_insert(struct erofs_inode *inode, + void *data, u32 tofh) { + struct erofs_packed_inode *epi = inode->sbi->packedinode; struct erofs_fragment_dedupe_item *di; + erofs_off_t len = inode->fragment_size; + erofs_off_t pos = inode->fragmentoff; if (len <= EROFS_TOF_HASHLEN) return 0; @@ -172,14 +171,13 @@ static int z_erofs_fragments_dedupe_insert(struct list_head *hash, void *data, return -ENOMEM; memcpy(di->data, data, len); - di->length = len; di->pos = pos; - - list_add_tail(&di->list, hash); + di->length = len; + list_add_tail(&di->list, &epi->hash[FRAGMENT_HASH(tofh)]); return 0; } -int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc) +int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofh) { struct erofs_packed_inode *epi = inode->sbi->packedinode; s64 offset, rc; @@ -240,9 +238,7 @@ int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc) inode->i_srcpath); if (memblock) - rc = z_erofs_fragments_dedupe_insert( - &epi->hash[FRAGMENT_HASH(tofcrc)], memblock, - inode->fragment_size, inode->fragmentoff); + rc = z_erofs_fragments_dedupe_insert(inode, memblock, tofh); else rc = 0; out: @@ -256,7 +252,7 @@ out: } int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, - unsigned int len, u32 tofcrc) + unsigned int len, u32 tofh) { struct erofs_packed_inode *epi = inode->sbi->packedinode; s64 offset = lseek(epi->fd, 0, SEEK_CUR); @@ -279,8 +275,7 @@ int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL, inode->i_srcpath); - ret = z_erofs_fragments_dedupe_insert(&epi->hash[FRAGMENT_HASH(tofcrc)], - data, len, inode->fragmentoff); + ret = z_erofs_fragments_dedupe_insert(inode, data, tofh); if (ret) return ret; return len; -- 2.43.5