In EROFS file-backend mount mode, the whole folio may trigger multiple backend file I/Os. The original read process mainly uses the @erofs_fileio_scan_folio, which employs a custom iteration mechanism on the folio and then to handle the I/Os. It requires complex mechanisms to manage the synchronization of multiple split file I/Os. This way couples the iteration operation with the data reading operation. We can decouple these two steps by using the iomap mechanism, thereby simplifying the implementation of the read process.
Signed-off-by: Hongbo Li <[email protected]> --- Changes from v1: - add folio release/invalidate hook. --- fs/erofs/data.c | 11 +-- fs/erofs/fileio.c | 176 ++++++++++++++++++++++---------------------- fs/erofs/internal.h | 8 ++ 3 files changed, 101 insertions(+), 94 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index f79ee80627d9..d1931fd6eed7 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -267,12 +267,6 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } -struct erofs_iomap_iter_ctx { - struct page *page; - void *base; - struct inode *realinode; -}; - static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { @@ -313,6 +307,9 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, else iomap->bdev = mdev.m_bdev; iomap->addr = mdev.m_dif->fsoff + mdev.m_pa; + /* keep device context when mapping to device */ + if (ctx) + ctx->dif = mdev.m_dif; if (flags & IOMAP_DAX) iomap->addr += mdev.m_dif->dax_part_off; } @@ -357,7 +354,7 @@ static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, return written; } -static const struct iomap_ops erofs_iomap_ops = { +const struct iomap_ops erofs_iomap_ops = { .iomap_begin = erofs_iomap_begin, .iomap_end = erofs_iomap_end, }; diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index abe873f01297..bdf086a992a7 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -13,10 +13,9 @@ struct erofs_fileio_rq { refcount_t ref; }; -struct erofs_fileio { - struct erofs_map_blocks map; - struct erofs_map_dev dev; +struct erofs_fileio_ctx { struct erofs_fileio_rq *rq; + struct erofs_device_info *dif; }; static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) @@ -32,7 +31,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) if (!rq->bio.bi_end_io) { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret < 0, false); + iomap_finish_folio_read(fi.folio, fi.offset, fi.length, + ret < 0 ? ret : 0); } } else if (ret < 0 && !rq->bio.bi_status) { rq->bio.bi_status = errno_to_blk_status(ret); @@ -88,111 +88,113 @@ void erofs_fileio_submit_bio(struct bio *bio) bio)); } -static int erofs_fileio_scan_folio(struct erofs_fileio *io, - struct inode *inode, struct folio *folio) +static int erofs_fileio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t len) { - struct erofs_map_blocks *map = &io->map; - unsigned int cur = 0, end = folio_size(folio), len, attached = 0; - loff_t pos = folio_pos(folio), ofs; - int err = 0; - - erofs_onlinefolio_init(folio); - while (cur < end) { - if (!in_range(pos + cur, map->m_la, map->m_llen)) { - map->m_la = pos + cur; - map->m_llen = end - cur; - err = erofs_map_blocks(inode, map); - if (err) - break; - } + struct erofs_iomap_iter_ctx *iter_ctx = iter->private; + struct erofs_device_info *dif = iter_ctx->dif; + struct inode *realinode = iter_ctx ? iter_ctx->realinode : iter->inode; + struct folio *folio = ctx->cur_folio; + struct erofs_fileio_ctx *fileio_ctx = ctx->read_ctx; + struct iomap *iomap = (struct iomap *)&iter->iomap; + size_t poff = offset_in_folio(folio, iter->pos); + loff_t pos = iter->pos; + int ret = 0; + + if (iomap->type == IOMAP_HOLE) { + folio_zero_range(folio, poff, len); + return 0; + } - ofs = folio_pos(folio) + cur - map->m_la; - len = min_t(loff_t, map->m_llen - ofs, end - cur); - if (map->m_flags & EROFS_MAP_META) { - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - void *src; - - src = erofs_read_metabuf(&buf, inode->i_sb, - map->m_pa + ofs, erofs_inode_in_metabox(inode)); - if (IS_ERR(src)) { - err = PTR_ERR(src); - break; - } - memcpy_to_folio(folio, cur, src, len); - erofs_put_metabuf(&buf); - } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { - folio_zero_segment(folio, cur, cur + len); - attached = 0; - } else { - if (io->rq && (map->m_pa + ofs != io->dev.m_pa || - map->m_deviceid != io->dev.m_deviceid)) { -io_retry: - erofs_fileio_rq_submit(io->rq); - io->rq = NULL; - } - - if (!io->rq) { - io->dev = (struct erofs_map_dev) { - .m_pa = io->map.m_pa + ofs, - .m_deviceid = io->map.m_deviceid, - }; - err = erofs_map_dev(inode->i_sb, &io->dev); - if (err) - break; - io->rq = erofs_fileio_rq_alloc(&io->dev); - io->rq->bio.bi_iter.bi_sector = - (io->dev.m_dif->fsoff + io->dev.m_pa) >> 9; - attached = 0; - } - if (!bio_add_folio(&io->rq->bio, folio, len, cur)) - goto io_retry; - if (!attached++) - erofs_onlinefolio_split(folio); - io->dev.m_pa += len; + while (len > 0) { + sector_t sector = iomap_sector(iomap, pos); + unsigned int off = offset_in_folio(folio, pos); + unsigned int n = min(len, folio_size(folio) - off); + struct erofs_map_dev mdev = {}; + + if (!n) + break; + if (!fileio_ctx->rq || + fileio_ctx->dif != dif || + bio_end_sector(&fileio_ctx->rq->bio) != sector) { + erofs_fileio_rq_submit(fileio_ctx->rq); + mdev = (struct erofs_map_dev) { + .m_dif = dif, + .m_sb = realinode->i_sb, + .m_pa = (sector << SECTOR_SHIFT) + off, + }; + fileio_ctx->dif = mdev.m_dif; + fileio_ctx->rq = erofs_fileio_rq_alloc(&mdev); + fileio_ctx->rq->bio.bi_iter.bi_sector = + (mdev.m_dif->fsoff + mdev.m_pa) >> SECTOR_SHIFT; + } + if (!bio_add_folio(&fileio_ctx->rq->bio, folio, n, off)) { + erofs_fileio_rq_submit(fileio_ctx->rq); + fileio_ctx->rq = NULL; + continue; } - cur += len; + pos += n; + len -= n; } - erofs_onlinefolio_end(folio, err, false); - return err; + return ret; +} + +static void erofs_fileio_submit_read(struct iomap_read_folio_ctx *ctx) +{ + struct erofs_fileio_ctx *fileio_ctx = ctx->read_ctx; + + erofs_fileio_rq_submit(fileio_ctx->rq); + fileio_ctx->rq = NULL; } +static const struct iomap_read_ops erofs_fileio_read_ops = { + .read_folio_range = erofs_fileio_read_folio_range, + .submit_read = erofs_fileio_submit_read, +}; + static int erofs_fileio_read_folio(struct file *file, struct folio *folio) { + struct erofs_fileio_ctx fileio_ctx = {}; + struct iomap_read_folio_ctx read_ctx = { + .ops = &erofs_fileio_read_ops, + .cur_folio = folio, + .read_ctx = &fileio_ctx, + }; bool need_iput; - struct inode *realinode = erofs_real_inode(folio_inode(folio), &need_iput); - struct erofs_fileio io = {}; - int err; + struct erofs_iomap_iter_ctx iter_ctx = { + .realinode = erofs_real_inode(folio_inode(folio), &need_iput), + }; - trace_erofs_read_folio(realinode, folio, true); - err = erofs_fileio_scan_folio(&io, realinode, folio); - erofs_fileio_rq_submit(io.rq); + trace_erofs_read_folio(iter_ctx.realinode, folio, true); + iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx); if (need_iput) - iput(realinode); - return err; + iput(iter_ctx.realinode); + return 0; } static void erofs_fileio_readahead(struct readahead_control *rac) { + struct erofs_fileio_ctx fileio_ctx = {}; + struct iomap_read_folio_ctx read_ctx = { + .ops = &erofs_fileio_read_ops, + .rac = rac, + .read_ctx = &fileio_ctx, + }; bool need_iput; - struct inode *realinode = erofs_real_inode(rac->mapping->host, &need_iput); - struct erofs_fileio io = {}; - struct folio *folio; - int err; + struct erofs_iomap_iter_ctx iter_ctx = { + .realinode = erofs_real_inode(rac->mapping->host, &need_iput), + }; - trace_erofs_readahead(realinode, readahead_index(rac), + trace_erofs_readahead(iter_ctx.realinode, readahead_index(rac), readahead_count(rac), true); - while ((folio = readahead_folio(rac))) { - err = erofs_fileio_scan_folio(&io, realinode, folio); - if (err && err != -EINTR) - erofs_err(realinode->i_sb, "readahead error at folio %lu @ nid %llu", - folio->index, EROFS_I(realinode)->nid); - } - erofs_fileio_rq_submit(io.rq); + iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx); if (need_iput) - iput(realinode); + iput(iter_ctx.realinode); } const struct address_space_operations erofs_fileio_aops = { .read_folio = erofs_fileio_read_folio, .readahead = erofs_fileio_readahead, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, }; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a4f0a42cf8c3..cda927225b9a 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -411,6 +411,13 @@ struct erofs_map_dev { unsigned int m_deviceid; }; +struct erofs_iomap_iter_ctx { + struct page *page; + void *base; + struct inode *realinode; + struct erofs_device_info *dif; +}; + extern const struct super_operations erofs_sops; extern const struct address_space_operations erofs_aops; @@ -427,6 +434,7 @@ extern const struct file_operations erofs_file_fops; extern const struct file_operations erofs_dir_fops; extern const struct file_operations erofs_ishare_fops; +extern const struct iomap_ops erofs_iomap_ops; extern const struct iomap_ops z_erofs_iomap_report_ops; /* flags for erofs_fscache_register_cookie() */ -- 2.22.0
