From: Nanzhe <[email protected]>

To avoid the complexity of unlocking a large folio in write_begin,
preallocate partial blocks for inodes that can use large folios.
During write_begin, read only the partial head and tail 4K subpages
that need read-before-write, and skip read I/O for the full middle
subpages covered by the write.

Signed-off-by: Nanzhe <[email protected]>
---
 fs/f2fs/data.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/f2fs/f2fs.h |   2 +
 fs/f2fs/file.c |  17 +++-
 3 files changed, 222 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 904cfaee139e..f5b4974e6b3c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2548,6 +2548,187 @@ static void ffs_detach_free(struct folio *folio)
        kfree(ffs);
 }
 
+bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index)
+{
+       struct f2fs_folio_state *ffs;
+       size_t offset;
+       unsigned int idx;
+
+       if (!folio_has_ffs(folio))
+               return folio_test_uptodate(folio);
+
+       ffs = folio->private;
+       offset = offset_in_folio(folio, (loff_t)index << PAGE_SHIFT);
+       idx = offset >> PAGE_SHIFT;
+       return test_bit(idx, ffs->state);
+}
+
+void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, size_t len)
+{
+       struct f2fs_folio_state *ffs;
+       unsigned int nr_subpages, start, end;
+
+       if (!folio_has_ffs(folio)) {
+               folio_mark_uptodate(folio);
+               return;
+       }
+
+       ffs = folio->private;
+       nr_subpages = folio_nr_pages(folio);
+       start = offset >> PAGE_SHIFT;
+       end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = min(end, nr_subpages);
+
+       bitmap_set(ffs->state, start, end - start);
+       if (bitmap_full(ffs->state, nr_subpages))
+               folio_mark_uptodate(folio);
+}
+
+static void ffs_mark_subrange_dirty(struct folio *folio,
+                                   size_t offset, size_t len)
+{
+       struct f2fs_folio_state *ffs;
+       unsigned int nr_subpages, start, end;
+       unsigned long flags;
+
+       if (!folio_has_ffs(folio))
+               return;
+
+       ffs = folio->private;
+       nr_subpages = folio_nr_pages(folio);
+       start = offset >> PAGE_SHIFT;
+       end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = min(end, nr_subpages);
+
+       spin_lock_irqsave(&ffs->state_lock, flags);
+       bitmap_set(ffs->state, nr_subpages + start, end - start);
+       spin_unlock_irqrestore(&ffs->state_lock, flags);
+}
+
+static bool f2fs_find_next_need_read_block(const struct folio *folio,
+                                         size_t orig_off, size_t *need_off,
+                                         size_t len)
+{
+       size_t start = orig_off;
+       size_t end = start + len;
+       size_t head, tail;
+       pgoff_t index;
+
+       if (start & (PAGE_SIZE - 1)) {
+               head = round_down(start, PAGE_SIZE);
+               index = folio->index + (head >> PAGE_SHIFT);
+               if (!ffs_test_blk_uptodate(folio, index)) {
+                       *need_off = head;
+                       return true;
+               }
+       }
+
+       if (end & (PAGE_SIZE - 1)) {
+               tail = round_down(end - 1, PAGE_SIZE);
+               index = folio->index + (tail >> PAGE_SHIFT);
+               if (!ffs_test_blk_uptodate(folio, index)) {
+                       *need_off = tail;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static int prepare_large_folio_write_begin(struct inode *inode,
+                                         struct address_space *mapping,
+                                         struct folio *folio, loff_t pos,
+                                         unsigned int len)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct dnode_of_data dn;
+       size_t ori_off = offset_in_folio(folio, pos);
+       size_t need_off = ori_off;
+       pgoff_t index;
+       bool get_dn;
+       int err = 0;
+       sector_t sector;
+       struct block_device *bdev;
+       struct bio *bio;
+
+       len = min_t(unsigned int, len, folio_size(folio) - ori_off);
+       if (folio_test_uptodate(folio) || len == folio_size(folio))
+               return err;
+
+       ffs_find_or_alloc(folio);
+
+       /* Inline data must have been converted before reaching here. */
+       if (WARN_ON_ONCE(f2fs_has_inline_data(inode)))
+               return -EINVAL;
+
+       while (f2fs_find_next_need_read_block(folio, ori_off, &need_off, len)) {
+               size_t off;
+
+               index = folio->index + (need_off >> PAGE_SHIFT);
+               get_dn = false;
+
+               if (!f2fs_lookup_read_extent_cache_block(inode, index,
+                                                &dn.data_blkaddr)) {
+                       if (IS_DEVICE_ALIASING(inode))
+                               return -ENODATA;
+
+                       set_new_dnode(&dn, inode, NULL, NULL, 0);
+                       err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+                       get_dn = true;
+                       if (err)
+                               goto out;
+               }
+
+               if (dn.data_blkaddr == NULL_ADDR) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               off = offset_in_folio(folio, index << PAGE_SHIFT);
+
+               if (dn.data_blkaddr == NEW_ADDR) {
+                       folio_zero_segment(folio, off, off + PAGE_SIZE);
+                       ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+                       continue;
+               }
+
+               if (!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+                               DATA_GENERIC_ENHANCE_READ)) {
+                       err = -EFSCORRUPTED;
+                       goto out;
+               }
+
+               /* Submit a synchronous read for this 4K subpage. */
+               f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+               bdev = f2fs_target_device(sbi, dn.data_blkaddr, &sector);
+
+               bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC,
+                                      GFP_NOIO, &f2fs_bioset);
+               bio->bi_iter.bi_sector = sector;
+               f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS);
+
+               if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) {
+                       bio_put(bio);
+                       err = -EIO;
+                       goto out;
+               }
+
+               err = submit_bio_wait(bio);
+               bio_put(bio);
+               if (err)
+                       goto out;
+
+               ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE);
+
+       }
+
+out:
+               if (get_dn)
+                       f2fs_put_dnode(&dn);
+
+               return err;
+}
+
 static int f2fs_read_data_large_folio(struct inode *inode,
                struct fsverity_info *vi,
                struct readahead_control *rac, struct folio *folio)
@@ -3940,6 +4121,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        bool need_balance = false;
        bool use_cow = false;
        block_t blkaddr = NULL_ADDR;
+       fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS;
        int err = 0;
 
        trace_f2fs_write_begin(inode, pos, len);
@@ -3987,9 +4169,9 @@ static int f2fs_write_begin(const struct kiocb *iocb,
         * Do not use FGP_STABLE to avoid deadlock.
         * Will wait that below with our IO control.
         */
-       folio = f2fs_filemap_get_folio(mapping, index,
-                               FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS,
-                               mapping_gfp_mask(mapping));
+       fgp |= fgf_set_order(len);
+       folio = __filemap_get_folio(mapping, index, fgp,
+                                   mapping_gfp_mask(mapping));
        if (IS_ERR(folio)) {
                err = PTR_ERR(folio);
                goto fail;
@@ -4002,7 +4184,7 @@ static int f2fs_write_begin(const struct kiocb *iocb,
        if (f2fs_is_atomic_file(inode))
                err = prepare_atomic_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance, &use_cow);
-       else
+       else if (!folio_test_large(folio))
                err = prepare_write_begin(sbi, folio, pos, len,
                                        &blkaddr, &need_balance);
        if (err)
@@ -4023,6 +4205,14 @@ static int f2fs_write_begin(const struct kiocb *iocb,
 
        f2fs_folio_wait_writeback(folio, DATA, false, true);
 
+       if (folio_test_large(folio)) {
+               err = prepare_large_folio_write_begin(inode, mapping, folio,
+                                       pos, len);
+               if (!err)
+                       return 0;
+               goto put_folio;
+       }
+
        if (len == folio_size(folio) || folio_test_uptodate(folio))
                return 0;
 
@@ -4076,15 +4266,19 @@ static int f2fs_write_end(const struct kiocb *iocb,
        trace_f2fs_write_end(inode, pos, len, copied);
 
        /*
-        * This should be come from len == PAGE_SIZE, and we expect copied
-        * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
-        * let generic_perform_write() try to copy data again through copied=0.
+        * If a short copy happens on a folio that isn't uptodate, we treat
+        * it with zero copied and let generic_perform_write() try to copy
+        * data again through copied=0.
         */
        if (!folio_test_uptodate(folio)) {
-               if (unlikely(copied != len))
+               if (unlikely(copied != len)) {
                        copied = 0;
-               else
+               } else if (folio_test_large(folio)) {
+                       ffs_mark_subrange_uptodate(folio,
+                                       offset_in_folio(folio, pos), len);
+               } else {
                        folio_mark_uptodate(folio);
+               }
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -4103,6 +4297,9 @@ static int f2fs_write_end(const struct kiocb *iocb,
        if (!copied)
                goto unlock_out;
 
+       if (folio_test_large(folio))
+               ffs_mark_subrange_dirty(folio, offset_in_folio(folio, pos),
+                                       copied);
        folio_mark_dirty(folio);
 
        if (f2fs_is_atomic_file(inode))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index dd262eb41777..ac71d0d22a81 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4253,6 +4253,8 @@ int f2fs_write_single_data_page(struct folio *folio, int 
*submitted,
                                struct writeback_control *wbc,
                                enum iostat_type io_type,
                                int compr_blocks, bool allow_balance);
+bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index);
+void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, size_t 
len);
 void f2fs_write_failed(struct inode *inode, loff_t to);
 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
 bool f2fs_release_folio(struct folio *folio, gfp_t wait);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 633e9ade654f..eb8e237f3dad 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -5048,9 +5048,20 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, 
struct iov_iter *iter,
                        return ret;
        }
 
-       /* Do not preallocate blocks that will be written partially in 4KB. */
-       map.m_lblk = F2FS_BLK_ALIGN(pos);
-       map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+       if (mapping_large_folio_support(inode->i_mapping)) {
+               /*
+                * Preallocate all blocks touched by a large-folio buffered 
write so
+                * the regular write_begin path does not need to unlock the 
folio for
+                * f2fs_balance_fs().  Rechecking large-folio state after 
unlock is
+                * unreliable since partial truncation can split the folio.
+                */
+               map.m_lblk = F2FS_BYTES_TO_BLK(pos);
+               map.m_len = F2FS_BLK_ALIGN(pos + count);
+       } else {
+               /* Do not preallocate blocks that will be written partially in 
4KB. */
+               map.m_lblk = F2FS_BLK_ALIGN(pos);
+               map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+       }
        if (map.m_len > map.m_lblk)
                map.m_len -= map.m_lblk;
        else
-- 
2.34.1



_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to