On 2025/1/15 15:09, Hongbo Li wrote:
erofs has add file-backed mount support. In this scenario, only buffer
io is allowed. So we enhance the io mode by implementing the direct
io. Also, this can make the iov_iter (user buffer) interact with the
backed file's page cache directly.

Base on this, we might decrease the memory overhead by the following io stack:

erofs io (buffer io, direct io) --> fileio --> file-backed's page cache.

That means we can implement direct page cache pass-through in EROFS, and the under mounted file use buffer io for the backed file system.

Thanks,
Hongbo

Signed-off-by: Hongbo Li <lihongb...@huawei.com>
---
  fs/erofs/data.c   |  11 +++--
  fs/erofs/fileio.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 0cd6b5c4df98..b5baff61be16 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -395,9 +395,14 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
        if (IS_DAX(inode))
                return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
  #endif
-       if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
-               return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
-                                   NULL, 0, NULL, 0);
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               if (inode->i_sb->s_bdev)
+                       return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
+                                               NULL, 0, NULL, 0);
+               if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
+                       return generic_file_read_iter(iocb, to);
+       }
+
        return filemap_read(iocb, to, 0);
  }
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 33f8539dda4a..76ed16a8ee75 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -10,12 +10,17 @@ struct erofs_fileio_rq {
        struct bio bio;
        struct kiocb iocb;
        struct super_block *sb;
+       ssize_t ret;
+       void *private;
  };
struct erofs_fileio {
+       struct file *file;
        struct erofs_map_blocks map;
        struct erofs_map_dev dev;
        struct erofs_fileio_rq *rq;
+       size_t total;
+       size_t done;
  };
static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
@@ -24,6 +29,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long 
ret)
                        container_of(iocb, struct erofs_fileio_rq, iocb);
        struct folio_iter fi;
+ rq->ret = ret;
        if (ret > 0) {
                if (ret != rq->bio.bi_iter.bi_size) {
                        bio_advance(&rq->bio, ret);
@@ -43,6 +49,17 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, 
long ret)
        kfree(rq);
  }
+static void erofs_fileio_end_io(struct bio *bio)
+{
+       struct erofs_fileio_rq *rq =
+                       container_of(bio, struct erofs_fileio_rq, bio);
+       struct erofs_fileio *io = rq->private;
+
+       if (rq->ret > 0) {
+               io->done += rq->ret;
+       }
+}
+
  static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
  {
        struct iov_iter iter;
@@ -189,7 +206,112 @@ static void erofs_fileio_readahead(struct 
readahead_control *rac)
        erofs_fileio_rq_submit(io.rq);
  }
+static int erofs_fileio_scan_iter(struct erofs_fileio *io, struct kiocb *iocb,
+                                 struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct erofs_map_blocks *map = &io->map;
+       struct iov_iter dest_iter = *iter;
+       unsigned int cur = 0, end = io->total, len;
+       loff_t pos = iocb->ki_pos;
+       int err = 0;
+
+       while (cur < end) {
+               /* submit the last fileio rq */
+               if (io->rq) {
+                       erofs_fileio_rq_submit(io->rq);
+                       io->rq = NULL;
+               }
+
+               if (!in_range(pos + cur, map->m_la, map->m_llen)) {
+                       map->m_la = pos + cur;
+                       map->m_llen = end - cur;
+                       err = erofs_map_blocks(inode, map);
+                       if (err)
+                               break;
+               }
+
+               len = min_t(loff_t, map->m_llen, end - cur);
+               /* split the whole iter with (cur, len) */
+               dest_iter = *iter;
+               iov_iter_advance(&dest_iter, cur);
+               iov_iter_truncate(&dest_iter, len);
+               if (map->m_flags & EROFS_MAP_META) {
+                       struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+                       void *src;
+
+                       src = erofs_read_metabuf(&buf, inode->i_sb, map->m_pa, 
EROFS_KMAP);
+                       if (IS_ERR(src)) {
+                               err = PTR_ERR(src);
+                               break;
+                       }
+                       if (copy_to_iter(src, len, &dest_iter) != len) {
+                               erofs_put_metabuf(&buf);
+                               err = -EIO;
+                               break;
+                       }
+                       erofs_put_metabuf(&buf);
+                       io->done += len;
+               } else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
+                       iov_iter_zero(len, &dest_iter);
+                       io->done += len;
+               } else {
+                       io->dev = (struct erofs_map_dev) {
+                               .m_pa = map->m_pa,
+                               .m_deviceid = map->m_deviceid,
+                       };
+                       err = erofs_map_dev(inode->i_sb, &io->dev);
+                       if (err)
+                               break;
+                       io->rq = erofs_fileio_rq_alloc(&io->dev);
+                       io->rq->private = io;
+                       io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 
SECTOR_SHIFT;
+                       io->rq->bio.bi_end_io = erofs_fileio_end_io;
+
+                       if (bio_iov_iter_get_pages(&io->rq->bio, &dest_iter)) {
+                               err = -EIO;
+                               break;
+                       }
+                       io->dev.m_pa += len;
+               }
+               cur += len;
+       }
+
+       return err;
+}
+
+static ssize_t erofs_fileio_direct_io(struct kiocb *iocb, struct iov_iter 
*iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       size_t i_size = i_size_read(inode);
+       struct erofs_fileio io = {};
+       int err = 0;
+
+       if (unlikely(iocb->ki_pos >= i_size))
+               return 0;
+
+       iter->count = min_t(size_t, iter->count,
+                           max_t(size_t, 0, i_size - iocb->ki_pos));
+
+       io.total = iter->count;
+       if (!io.total)
+               return 0;
+
+       err = erofs_fileio_scan_iter(&io, iocb, iter);
+       if (err)
+               return err;
+       erofs_fileio_rq_submit(io.rq);
+       if (io.total != io.done)
+               return -EIO;
+
+       iov_iter_advance(iter, io.done);
+       return io.done;
+}
+
  const struct address_space_operations erofs_fileio_aops = {
        .read_folio = erofs_fileio_read_folio,
        .readahead = erofs_fileio_readahead,
+       .direct_IO = erofs_fileio_direct_io,
  };

Reply via email to