This patch adds support for large folios buffered write for regular files
with exception of atomic files, inline files and compressed files.

We introducte state bitmap to track the uptodate/dirty state of each sub
-page within a folio. And for allocating multiple size f2fs_folio_state,
we use kzalloc for now. Better ideas are appreciated.

Signed-off-by: Nanzhe Zhao <[email protected]>
---
 fs/f2fs/data.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/f2fs/f2fs.h |   1 +
 fs/f2fs/file.c |   4 -
 3 files changed, 210 insertions(+), 17 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3a40db6894fc..3aaf20824205 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2414,14 +2414,26 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
struct bio **bio_ret,
 static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio)
 {
        struct f2fs_folio_state *ffs = folio->private;
+       unsigned int nr_subpages;
 
        if (ffs)
                return ffs;
 
-       ffs = f2fs_kmem_cache_alloc(ffs_entry_slab,
-                       GFP_NOIO | __GFP_ZERO, true, NULL);
+       nr_subpages = folio_size(folio) >> PAGE_SHIFT;
+       if (nr_subpages <= 1)
+               return NULL;
+
+       ffs = kzalloc(struct_size(ffs, state, BITS_TO_LONGS(2 * nr_subpages)),
+                       GFP_NOIO);
+       if (!ffs)
+               return NULL;
 
        spin_lock_init(&ffs->state_lock);
+       if (folio_test_uptodate(folio))
+               bitmap_set(ffs->state, 0, nr_subpages);
+       if (folio_test_dirty(folio))
+               bitmap_set(ffs->state, nr_subpages, nr_subpages);
+
        folio_attach_private(folio, ffs);
        return ffs;
 }
@@ -2440,7 +2452,177 @@ static void ffs_detach_free(struct folio *folio)
                return;
 
        WARN_ON_ONCE(ffs->read_pages_pending != 0);
-       kmem_cache_free(ffs_entry_slab, ffs);
+       kfree(ffs);
+}
+
+static inline unsigned int ffs_nr_subpages(const struct folio *folio)
+{
+       return folio_size(folio) >> PAGE_SHIFT;
+}
+
+static inline bool ffs_subpage_is_uptodate(struct f2fs_folio_state *ffs,
+                                        const struct folio *folio, size_t 
offset)
+{
+       unsigned int idx = offset >> PAGE_SHIFT;
+
+       if (!ffs)
+               return false;
+
+       if (idx >= ffs_nr_subpages(folio))
+               return false;
+
+       return test_bit(idx, ffs->state);
+}
+
+/* Must be called while holding folio lock */
+static inline void ffs_mark_subrange_uptodate(struct folio *folio, size_t 
offset, size_t len)
+{
+       struct f2fs_folio_state *ffs = folio->private;
+       unsigned int nr_subpages = ffs_nr_subpages(folio);
+       unsigned int start, end;
+
+       if (!ffs) {
+               folio_mark_uptodate(folio);
+               return;
+       }
+
+       start = offset >> PAGE_SHIFT;
+       end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = min(end, nr_subpages);
+
+       bitmap_set(ffs->state, start, end - start);
+       if (bitmap_full(ffs->state, nr_subpages))
+               folio_mark_uptodate(folio);
+}
+
+/*
+ * During buffered writes, we may only need to read the first and last
+ * page of a folio.
+ */
+static bool f2fs_find_next_need_read_block(struct f2fs_folio_state *ffs,
+                                         const struct folio *folio,
+                                         size_t orig_off, size_t *need_off,
+                                         size_t len)
+{
+       size_t start = orig_off;
+       size_t end = start + len;
+       size_t head, tail;
+
+       if (start & (PAGE_SIZE - 1)) {
+               head = round_down(start, PAGE_SIZE);
+               if (!ffs_subpage_is_uptodate(ffs, folio, head)) {
+                       *need_off = head;
+                       return true;
+               }
+       }
+
+       if (end & (PAGE_SIZE - 1)) {
+               tail = round_down(end - 1, PAGE_SIZE);
+               if (!ffs_subpage_is_uptodate(ffs, folio, tail)) {
+                       *need_off = tail;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static int prepare_large_folio_write_begin(struct inode *inode,
+                                         struct address_space *mapping,
+                                         struct folio *folio, loff_t pos,
+                                         unsigned int len)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_folio_state *ffs;
+       struct dnode_of_data dn;
+       size_t ori_off = offset_in_folio(folio, pos);
+       size_t need_off = ori_off;
+       pgoff_t index;
+       bool get_dn;
+       int err = 0;
+       sector_t sector;
+       struct block_device *bdev;
+       struct bio *bio;
+
+       len = min_t(unsigned int, len, folio_size(folio) - ori_off);
+       if (folio_test_uptodate(folio) || len == folio_size(folio))
+               return 0;
+
+       ffs = ffs_find_or_alloc(folio);
+       if (!ffs)
+               return 0;
+
+       /* Inline data must have been converted before reaching here. */
+       if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+               return -EINVAL;
+
+       while (f2fs_find_next_need_read_block(ffs, folio, ori_off, &need_off, 
len)) {
+               size_t off;
+
+               index = folio->index + (ori_off >> PAGE_SHIFT);
+               get_dn = false;
+
+               if (!f2fs_lookup_read_extent_cache_block(inode, index,
+                                                &dn.data_blkaddr)) {
+                       if (IS_DEVICE_ALIASING(inode))
+                               return -ENODATA;
+
+                       set_new_dnode(&dn, inode, NULL, NULL, 0);
+                       err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+                       get_dn = true;
+                       if (err)
+                               goto out;
+               }
+
+               if (dn.data_blkaddr == NULL_ADDR) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               off = offset_in_folio(folio, index << PAGE_SHIFT);
+
+               if (dn.data_blkaddr == NEW_ADDR) {
+                       folio_zero_segment(folio, off, off + PAGE_SIZE);
+                       ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+                       continue;
+               }
+
+               if (!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+                               DATA_GENERIC_ENHANCE_READ)) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               /* Submit a synchronous read for this subpage. */
+               f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+               bdev = f2fs_target_device(sbi, dn.data_blkaddr, &sector);
+
+               bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC,
+                                      GFP_NOIO, &f2fs_bioset);
+               bio->bi_iter.bi_sector = sector;
+               f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS);
+
+               if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) {
+                       bio_put(bio);
+                       err = -EIO;
+                       goto out;
+               }
+
+               err = submit_bio_wait(bio);
+               bio_put(bio);
+               if (err)
+                       goto out;
+
+               ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+
+out:
+               if (get_dn)
+                       f2fs_put_dnode(&dn);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 static int f2fs_read_data_large_folio(struct inode *inode,
@@ -2457,7 +2639,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
        int ret = 0;
        bool folio_in_bio;
 
-       if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) {
+       if (f2fs_compressed_file(inode)) {
                if (folio)
                        folio_unlock(folio);
                return -EOPNOTSUPP;
@@ -3828,6 +4010,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        bool need_balance = false;
        bool use_cow = false;
        block_t blkaddr = NULL_ADDR;
+       fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT;
        int err = 0;
 
        trace_f2fs_write_begin(inode, pos, len);
@@ -3875,9 +4058,8 @@ static int f2fs_write_begin(const struct kiocb *iocb,
         * Do not use FGP_STABLE to avoid deadlock.
         * Will wait that below with our IO control.
         */
-       folio = f2fs_filemap_get_folio(mapping, index,
-                               FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS,
-                               mapping_gfp_mask(mapping));
+       fgp |= fgf_set_order(len);
+       folio = f2fs_filemap_get_folio(mapping, index, fgp, 
mapping_gfp_mask(mapping));
        if (IS_ERR(folio)) {
                err = PTR_ERR(folio);
                goto fail;
@@ -3890,7 +4072,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        if (f2fs_is_atomic_file(inode))
                err = prepare_atomic_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance, &use_cow);
-       else
+       else if (!folio_test_large(folio))
                err = prepare_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance);
        if (err)
@@ -3911,6 +4093,13 @@ static int f2fs_write_begin(const struct kiocb *iocb,
 
        f2fs_folio_wait_writeback(folio, DATA, false, true);
 
+       if (folio_test_large(folio)) {
+               err = prepare_large_folio_write_begin(inode, mapping, folio, 
pos, len);
+               if (!err)
+                       return 0;
+               goto put_folio;
+       }
+
        if (len == folio_size(folio) || folio_test_uptodate(folio))
                return 0;
 
@@ -3963,15 +4152,22 @@ static int f2fs_write_end(const struct kiocb *iocb,
        trace_f2fs_write_end(inode, pos, len, copied);
 
        /*
-        * This should be come from len == PAGE_SIZE, and we expect copied
-        * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
-        * let generic_perform_write() try to copy data again through copied=0.
+        * If a short copy happens on a folio that isn't uptodate, we treat it
+        * with zero copied and let generic_perform_write() try to copy data 
again
+        * through copied=0.
         */
        if (!folio_test_uptodate(folio)) {
-               if (unlikely(copied != len))
+               if (unlikely(copied != len)) {
                        copied = 0;
-               else
+               } else if (folio_test_large(folio)) {
+                       ffs_mark_subrange_uptodate(folio,
+                                                  offset_in_folio(folio, pos), 
len);
+               } else {
+                       /*
+                        * For order-0 folios, this should be come from len == 
PAGE_SIZE
+                        */
                        folio_mark_uptodate(folio);
+               }
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e5b8f5374666..04a6310145c4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2045,6 +2045,7 @@ struct f2fs_sb_info {
 struct f2fs_folio_state {
        spinlock_t              state_lock;
        unsigned int            read_pages_pending;
+       unsigned long           state[];
 };
 
 /* Definitions to access f2fs_sb_info */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 18a9feccb1f9..9479f4d447c9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -624,10 +624,6 @@ static int f2fs_file_open(struct inode *inode, struct file 
*filp)
        if (!f2fs_is_compress_backend_ready(inode))
                return -EOPNOTSUPP;
 
-       if (mapping_large_folio_support(inode->i_mapping) &&
-           filp->f_mode & FMODE_WRITE)
-               return -EOPNOTSUPP;
-
        err = fsverity_file_open(inode, filp);
        if (err)
                return err;
-- 
2.34.1



_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to