On Wed, 4 Jan 2023 14:44:35 +0800 Xiang Gao <[email protected]> wrote:
> On 2023/1/4 14:16, Yue Hu wrote: > > Hi Xiang, > > > > Thanks for the reviewing. I'm planning to send v2. > > > > On Wed, 4 Jan 2023 11:13:55 +0800 > > Xiang Gao <[email protected]> wrote: > > > >> On 2022/12/19 17:50, Yue Hu wrote: > >>> From: Yue Hu <[email protected]> > >>> > >>> Add compressed fragments support for dump feature. > >>> > >>> Signed-off-by: Yue Hu <[email protected]> > >>> --- > >>> dump/main.c | 78 ++++++++++++++++++++++++++++++++-------- > >>> include/erofs/internal.h | 1 + > >>> lib/zmap.c | 2 +- > >>> 3 files changed, 65 insertions(+), 16 deletions(-) > >>> > >>> diff --git a/dump/main.c b/dump/main.c > >>> index bc4f047..d387841 100644 > >>> --- a/dump/main.c > >>> +++ b/dump/main.c > >>> @@ -14,6 +14,8 @@ > >>> #include "erofs/inode.h" > >>> #include "erofs/io.h" > >>> #include "erofs/dir.h" > >>> +#include "erofs/compress.h" > >>> +#include "erofs/fragments.h" > >>> #include "../lib/liberofs_private.h" > >>> > >>> #ifdef HAVE_LIBUUID > >>> @@ -96,6 +98,7 @@ static struct erofsdump_feature feature_lists[] = { > >>> { false, EROFS_FEATURE_INCOMPAT_CHUNKED_FILE, "chunked_file" }, > >>> { false, EROFS_FEATURE_INCOMPAT_DEVICE_TABLE, "device_table" }, > >>> { false, EROFS_FEATURE_INCOMPAT_ZTAILPACKING, "ztailpacking" }, > >>> + { false, EROFS_FEATURE_INCOMPAT_FRAGMENTS, "fragments" }, > >>> }; > >>> > >>> static int erofsdump_readdir(struct erofs_dir_context *ctx); > >>> @@ -285,10 +288,12 @@ static int erofsdump_readdir(struct > >>> erofs_dir_context *ctx) > >>> } > >>> > >>> if (S_ISREG(vi.i_mode)) { > >>> - stats.files_total_origin_size += vi.i_size; > >>> - inc_file_extension_count(ctx->dname, ctx->de_namelen); > >>> + if (!erofs_is_packed_inode(&vi)) { > >>> + stats.files_total_origin_size += vi.i_size; > >>> + inc_file_extension_count(ctx->dname, ctx->de_namelen); > >>> + update_file_size_statistics(vi.i_size, true); > >>> + } > >>> stats.files_total_size += occupied_size; > >>> - update_file_size_statistics(vi.i_size, true); > >>> update_file_size_statistics(occupied_size, false); > >>> } > >>> > >>> @@ -320,6 +325,10 @@ static void erofsdump_show_fileinfo(bool show_extent) > >>> "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" > >>> PRIu64 "..%10" PRIu64 " | %7" PRIu64 "\n", > >>> "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " : %10" > >>> PRIu64 "..%10" PRIu64 " | %7" PRIu64 " # device %u\n" > >>> }; > >>> + const char *frag_ext_fmt[] = { > >>> + "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 "\n", > >>> + "%4d: %8" PRIu64 "..%8" PRIu64 " | %7" PRIu64 " # device %u\n" > >>> + }; > >> > >> Why do we need another fmt rather than just fill fragment extent with > > > > I also plan to remove this fmt in v2. > > > >> physical addr 0? > > > > Okay. Just leave latter physical length as empty? > > > >> > >>> int err, i; > >>> erofs_off_t size; > >>> u16 access_mode; > >>> @@ -348,16 +357,31 @@ static void erofsdump_show_fileinfo(bool > >>> show_extent) > >>> } > >>> } > >>> > >>> + if (erofs_inode_is_data_compressed(inode.datalayout)) { > >>> + err = z_erofs_fill_inode_lazy(&inode); > >>> + if (err) { > >>> + erofs_err("read inode map header failed @ nid %llu", > >>> + inode.nid | 0ULL); > >>> + return; > >>> + } > >>> + } > >> > >> Why do we need to call z_erofs_fill_inode_lazy here? > > > > Used to check if the file has fragment. If yes, i think 'Compression ratio' > > can not be showed > > exactly. > > > > So, I'd like to show fragment information related instead as below: > > > > NID: 715 Links: 1 Layout: 1 Fragment: entire file > > > > NID: 715 Links: 1 Layout: 3 Fragment: tail of file > > I think no need to distinguish these two stuffs considering extra > complexity. So, no need to check fragment or not? just keep the original look? > > > > >> > >>> + > >>> err = erofs_get_occupied_size(&inode, &size); > >>> if (err) { > >>> erofs_err("get file size failed @ nid %llu", inode.nid > >>> | 0ULL); > >>> return; > >>> } > >>> > >>> - err = erofs_get_pathname(inode.nid, path, sizeof(path)); > >>> - if (err < 0) { > >>> - erofs_err("file path not found @ nid %llu", inode.nid | 0ULL); > >>> - return; > >> > >> Can we just ignore pathname if it doesn't exist? > > > > Okay. > > > >> > >>> + if (erofs_is_packed_inode(&inode) { > + strncpy(path, > >>> EROFS_PACKED_INODE, sizeof(path) - 1); > >>> + path[sizeof(path) - 1] = '\0'; > >>> + } else { > >>> + err = erofs_get_pathname(inode.nid, path, sizeof(path)); > >>> + if (err < 0) { > >>> + erofs_err("file path not found @ nid %llu", > >>> + inode.nid | 0ULL); > >>> + return; > >>> + } > >>> } > >>> > >>> strftime(timebuf, sizeof(timebuf), > >>> @@ -372,9 +396,13 @@ static void erofsdump_show_fileinfo(bool show_extent) > >>> file_category_types[erofs_mode_to_ftype(inode.i_mode)]); > >>> fprintf(stdout, "NID: %" PRIu64 " ", inode.nid); > >>> fprintf(stdout, "Links: %u ", inode.i_nlink); > >>> - fprintf(stdout, "Layout: %d Compression ratio: %.2f%%\n", > >>> - inode.datalayout, > >>> - (double)(100 * size) / (double)(inode.i_size)); > >>> + if (inode.z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) > >>> + fprintf(stdout, "Layout: %d Fragment: %s\n", > >>> + inode.datalayout, size ? "partial" : "full"); > >>> + else > >>> + fprintf(stdout, "Layout: %d Compression ratio: %.2f%%\n", > >>> + inode.datalayout, > >>> + (double)(100 * size) / (double)(inode.i_size)); > >>> fprintf(stdout, "Inode size: %d ", inode.inode_isize); > >>> fprintf(stdout, "Extent size: %u ", inode.extent_isize); > >>> fprintf(stdout, "Xattr size: %u\n", inode.xattr_isize); > >>> @@ -404,7 +432,8 @@ static void erofsdump_show_fileinfo(bool show_extent) > >>> if (!dumpcfg.show_extent) > >>> return; > >>> > >>> - fprintf(stdout, "\n Ext: logical offset | length : physical > >>> offset | length\n"); > >>> + fprintf(stdout, "\n Ext: logical offset | length%s\n", > >>> + size ? " : physical offset | length" : ""); > >>> while (map.m_la < inode.i_size) { > >>> struct erofs_map_dev mdev; > >>> > >>> @@ -425,10 +454,17 @@ static void erofsdump_show_fileinfo(bool > >>> show_extent) > >>> return; > >>> } > >>> > >>> - fprintf(stdout, ext_fmt[!!mdev.m_deviceid], extent_count++, > >>> - map.m_la, map.m_la + map.m_llen, map.m_llen, > >>> - mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen, > >>> - mdev.m_deviceid); > >>> + if (map.m_flags & EROFS_MAP_FRAGMENT) > >>> + fprintf(stdout, frag_ext_fmt[!!mdev.m_deviceid], > >>> + extent_count++, > >>> + map.m_la, map.m_la + map.m_llen, map.m_llen, > >>> + mdev.m_deviceid); > >> > >> except for the last fragment extent, the other extents all have physical > >> addr and length... > > > > It's true. > > > > What bothers me is how to better show fragment extent based on current > > format (especially > > no physical address and offset for fragment extent). > > > > Now, i think the fragment extent should be showed like below (just use a > > '0' to represent it)? > > > > a) the whole file is a fragment: > > > > Ext: logical offset | length : physical offset | length > > 1: 0.. 1046 | 1046 : 0 > > > Can we use > Ext: logical offset | length : physical offset | length > 1: 0.. 1046 | 1046 : 0..0 | 0 > > > > > b) the tail of file is a fragment: > > > > Ext: logical offset | length : physical offset | length > > 0: 0.. 19672 | 19672 : 1314816.. 1323008 | 8192 > > ... > > 13: 165989.. 182474 | 16485 : 1421312.. 1429504 | 8192 > > 14: 182474.. 196435 | 13961 : 0 > > Here > 14: 182474.. 196435 | 13961 : 0.. 0 | 0 > > as well? No problem now. > > > > >> > >>> + else > >>> + fprintf(stdout, ext_fmt[!!mdev.m_deviceid], > >>> + extent_count++, > >>> + map.m_la, map.m_la + map.m_llen, map.m_llen, > >>> + mdev.m_pa, mdev.m_pa + map.m_plen, map.m_plen, > >>> + mdev.m_deviceid); > >>> map.m_la += map.m_llen; > >>> } > >>> fprintf(stdout, "%s: %d extents found\n", path, extent_count); > >>> @@ -537,6 +573,15 @@ static void erofsdump_print_statistic(void) > >>> erofs_err("read dir failed"); > >>> return; > >>> } > >>> + if (erofs_sb_has_fragments()) { > >>> + err = erofsdump_readdir(&(struct erofs_dir_context) { > >>> + .de_nid = sbi.packed_nid > >>> + }); > >> > >> why do we need this? > > > > Since some status need to be updated such as stats.files_total_size. > > why should packed file be recorded in files_total_size? Mainly related to compression ratio. files_total_size records on-disk (un)compressed size. 296 stats.files_total_size += occupied_size; 544 stats.compress_rate = (double)(100 * stats.files_total_size) / 545 (double)(stats.files_total_origin_size); The file system compression ratio will be incorrect (since all compressed fragments are missing). > > Thanks, > Gao Xiang > > > > >> > >> Thanks, > >> Gao Xiang
