[f2fs-dev] [RFC PATCH v1 4/5] f2fs: Support large folios buffered write

Nanzhe Zhao Tue, 03 Feb 2026 01:41:37 -0800

This patch adds support for large folios buffered write for regular files
with exception of atomic files, inline files and compressed files.


We introducte state bitmap to track the uptodate/dirty state of each sub
-page within a folio. And for allocating multiple size f2fs_folio_state,
we use kzalloc for now. Better ideas are appreciated.

Signed-off-by: Nanzhe Zhao <[email protected]>
---
 fs/f2fs/data.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/f2fs/f2fs.h |   1 +
 fs/f2fs/file.c |   4 -
 3 files changed, 210 insertions(+), 17 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3a40db6894fc..3aaf20824205 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2414,14 +2414,26 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
struct bio **bio_ret,
 static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio)
 {
        struct f2fs_folio_state *ffs = folio->private;
+       unsigned int nr_subpages;
 
        if (ffs)
                return ffs;
 
-       ffs = f2fs_kmem_cache_alloc(ffs_entry_slab,
-                       GFP_NOIO | __GFP_ZERO, true, NULL);
+       nr_subpages = folio_size(folio) >> PAGE_SHIFT;
+       if (nr_subpages <= 1)
+               return NULL;
+
+       ffs = kzalloc(struct_size(ffs, state, BITS_TO_LONGS(2 * nr_subpages)),
+                       GFP_NOIO);
+       if (!ffs)
+               return NULL;
 
        spin_lock_init(&ffs->state_lock);
+       if (folio_test_uptodate(folio))
+               bitmap_set(ffs->state, 0, nr_subpages);
+       if (folio_test_dirty(folio))
+               bitmap_set(ffs->state, nr_subpages, nr_subpages);
+
        folio_attach_private(folio, ffs);
        return ffs;
 }
@@ -2440,7 +2452,177 @@ static void ffs_detach_free(struct folio *folio)
                return;
 
        WARN_ON_ONCE(ffs->read_pages_pending != 0);
-       kmem_cache_free(ffs_entry_slab, ffs);
+       kfree(ffs);
+}
+
+static inline unsigned int ffs_nr_subpages(const struct folio *folio)
+{
+       return folio_size(folio) >> PAGE_SHIFT;
+}
+
+static inline bool ffs_subpage_is_uptodate(struct f2fs_folio_state *ffs,
+                                        const struct folio *folio, size_t 
offset)
+{
+       unsigned int idx = offset >> PAGE_SHIFT;
+
+       if (!ffs)
+               return false;
+
+       if (idx >= ffs_nr_subpages(folio))
+               return false;
+
+       return test_bit(idx, ffs->state);
+}
+
+/* Must be called while holding folio lock */
+static inline void ffs_mark_subrange_uptodate(struct folio *folio, size_t 
offset, size_t len)
+{
+       struct f2fs_folio_state *ffs = folio->private;
+       unsigned int nr_subpages = ffs_nr_subpages(folio);
+       unsigned int start, end;
+
+       if (!ffs) {
+               folio_mark_uptodate(folio);
+               return;
+       }
+
+       start = offset >> PAGE_SHIFT;
+       end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = min(end, nr_subpages);
+
+       bitmap_set(ffs->state, start, end - start);
+       if (bitmap_full(ffs->state, nr_subpages))
+               folio_mark_uptodate(folio);
+}
+
+/*
+ * During buffered writes, we may only need to read the first and last
+ * page of a folio.
+ */
+static bool f2fs_find_next_need_read_block(struct f2fs_folio_state *ffs,
+                                         const struct folio *folio,
+                                         size_t orig_off, size_t *need_off,
+                                         size_t len)
+{
+       size_t start = orig_off;
+       size_t end = start + len;
+       size_t head, tail;
+
+       if (start & (PAGE_SIZE - 1)) {
+               head = round_down(start, PAGE_SIZE);
+               if (!ffs_subpage_is_uptodate(ffs, folio, head)) {
+                       *need_off = head;
+                       return true;
+               }
+       }
+
+       if (end & (PAGE_SIZE - 1)) {
+               tail = round_down(end - 1, PAGE_SIZE);
+               if (!ffs_subpage_is_uptodate(ffs, folio, tail)) {
+                       *need_off = tail;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static int prepare_large_folio_write_begin(struct inode *inode,
+                                         struct address_space *mapping,
+                                         struct folio *folio, loff_t pos,
+                                         unsigned int len)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_folio_state *ffs;
+       struct dnode_of_data dn;
+       size_t ori_off = offset_in_folio(folio, pos);
+       size_t need_off = ori_off;
+       pgoff_t index;
+       bool get_dn;
+       int err = 0;
+       sector_t sector;
+       struct block_device *bdev;
+       struct bio *bio;
+
+       len = min_t(unsigned int, len, folio_size(folio) - ori_off);
+       if (folio_test_uptodate(folio) || len == folio_size(folio))
+               return 0;
+
+       ffs = ffs_find_or_alloc(folio);
+       if (!ffs)
+               return 0;
+
+       /* Inline data must have been converted before reaching here. */
+       if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+               return -EINVAL;
+
+       while (f2fs_find_next_need_read_block(ffs, folio, ori_off, &need_off, 
len)) {
+               size_t off;
+
+               index = folio->index + (ori_off >> PAGE_SHIFT);
+               get_dn = false;
+
+               if (!f2fs_lookup_read_extent_cache_block(inode, index,
+                                                &dn.data_blkaddr)) {
+                       if (IS_DEVICE_ALIASING(inode))
+                               return -ENODATA;
+
+                       set_new_dnode(&dn, inode, NULL, NULL, 0);
+                       err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+                       get_dn = true;
+                       if (err)
+                               goto out;
+               }
+
+               if (dn.data_blkaddr == NULL_ADDR) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               off = offset_in_folio(folio, index << PAGE_SHIFT);
+
+               if (dn.data_blkaddr == NEW_ADDR) {
+                       folio_zero_segment(folio, off, off + PAGE_SIZE);
+                       ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+                       continue;
+               }
+
+               if (!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+                               DATA_GENERIC_ENHANCE_READ)) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               /* Submit a synchronous read for this subpage. */
+               f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+               bdev = f2fs_target_device(sbi, dn.data_blkaddr, &sector);
+
+               bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC,
+                                      GFP_NOIO, &f2fs_bioset);
+               bio->bi_iter.bi_sector = sector;
+               f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS);
+
+               if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) {
+                       bio_put(bio);
+                       err = -EIO;
+                       goto out;
+               }
+
+               err = submit_bio_wait(bio);
+               bio_put(bio);
+               if (err)
+                       goto out;
+
+               ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+
+out:
+               if (get_dn)
+                       f2fs_put_dnode(&dn);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 static int f2fs_read_data_large_folio(struct inode *inode,
@@ -2457,7 +2639,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
        int ret = 0;
        bool folio_in_bio;
 
-       if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) {
+       if (f2fs_compressed_file(inode)) {
                if (folio)
                        folio_unlock(folio);
                return -EOPNOTSUPP;
@@ -3828,6 +4010,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        bool need_balance = false;
        bool use_cow = false;
        block_t blkaddr = NULL_ADDR;
+       fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT;
        int err = 0;
 
        trace_f2fs_write_begin(inode, pos, len);
@@ -3875,9 +4058,8 @@ static int f2fs_write_begin(const struct kiocb *iocb,
         * Do not use FGP_STABLE to avoid deadlock.
         * Will wait that below with our IO control.
         */
-       folio = f2fs_filemap_get_folio(mapping, index,
-                               FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS,
-                               mapping_gfp_mask(mapping));
+       fgp |= fgf_set_order(len);
+       folio = f2fs_filemap_get_folio(mapping, index, fgp, 
mapping_gfp_mask(mapping));
        if (IS_ERR(folio)) {
                err = PTR_ERR(folio);
                goto fail;
@@ -3890,7 +4072,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        if (f2fs_is_atomic_file(inode))
                err = prepare_atomic_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance, &use_cow);
-       else
+       else if (!folio_test_large(folio))
                err = prepare_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance);
        if (err)
@@ -3911,6 +4093,13 @@ static int f2fs_write_begin(const struct kiocb *iocb,
 
        f2fs_folio_wait_writeback(folio, DATA, false, true);
 
+       if (folio_test_large(folio)) {
+               err = prepare_large_folio_write_begin(inode, mapping, folio, 
pos, len);
+               if (!err)
+                       return 0;
+               goto put_folio;
+       }
+
        if (len == folio_size(folio) || folio_test_uptodate(folio))
                return 0;
 
@@ -3963,15 +4152,22 @@ static int f2fs_write_end(const struct kiocb *iocb,
        trace_f2fs_write_end(inode, pos, len, copied);
 
        /*
-        * This should be come from len == PAGE_SIZE, and we expect copied
-        * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
-        * let generic_perform_write() try to copy data again through copied=0.
+        * If a short copy happens on a folio that isn't uptodate, we treat it
+        * with zero copied and let generic_perform_write() try to copy data 
again
+        * through copied=0.
         */
        if (!folio_test_uptodate(folio)) {
-               if (unlikely(copied != len))
+               if (unlikely(copied != len)) {
                        copied = 0;
-               else
+               } else if (folio_test_large(folio)) {
+                       ffs_mark_subrange_uptodate(folio,
+                                                  offset_in_folio(folio, pos), 
len);
+               } else {
+                       /*
+                        * For order-0 folios, this should be come from len == 
PAGE_SIZE
+                        */
                        folio_mark_uptodate(folio);
+               }
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e5b8f5374666..04a6310145c4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2045,6 +2045,7 @@ struct f2fs_sb_info {
 struct f2fs_folio_state {
        spinlock_t              state_lock;
        unsigned int            read_pages_pending;
+       unsigned long           state[];
 };
 
 /* Definitions to access f2fs_sb_info */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 18a9feccb1f9..9479f4d447c9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -624,10 +624,6 @@ static int f2fs_file_open(struct inode *inode, struct file 
*filp)
        if (!f2fs_is_compress_backend_ready(inode))
                return -EOPNOTSUPP;
 
-       if (mapping_large_folio_support(inode->i_mapping) &&
-           filp->f_mode & FMODE_WRITE)
-               return -EOPNOTSUPP;
-
        err = fsverity_file_open(inode, filp);
        if (err)
                return err;
-- 
2.34.1



_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

[f2fs-dev] [RFC PATCH v1 4/5] f2fs: Support large folios buffered write

Reply via email to