From: Nanzhe <[email protected]> To avoid the complexity of unlocking a large folio in write_begin, preallocate partial blocks for inodes that can use large folios. During write_begin, read only the partial head and tail 4K subpages that need read-before-write, and skip read I/O for the full middle subpages covered by the write.
Signed-off-by: Nanzhe <[email protected]> --- fs/f2fs/data.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/f2fs/f2fs.h | 2 + fs/f2fs/file.c | 17 +++- 3 files changed, 222 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 904cfaee139e..f5b4974e6b3c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2548,6 +2548,187 @@ static void ffs_detach_free(struct folio *folio) kfree(ffs); } +bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index) +{ + struct f2fs_folio_state *ffs; + size_t offset; + unsigned int idx; + + if (!folio_has_ffs(folio)) + return folio_test_uptodate(folio); + + ffs = folio->private; + offset = offset_in_folio(folio, (loff_t)index << PAGE_SHIFT); + idx = offset >> PAGE_SHIFT; + return test_bit(idx, ffs->state); +} + +void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, size_t len) +{ + struct f2fs_folio_state *ffs; + unsigned int nr_subpages, start, end; + + if (!folio_has_ffs(folio)) { + folio_mark_uptodate(folio); + return; + } + + ffs = folio->private; + nr_subpages = folio_nr_pages(folio); + start = offset >> PAGE_SHIFT; + end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + end = min(end, nr_subpages); + + bitmap_set(ffs->state, start, end - start); + if (bitmap_full(ffs->state, nr_subpages)) + folio_mark_uptodate(folio); +} + +static void ffs_mark_subrange_dirty(struct folio *folio, + size_t offset, size_t len) +{ + struct f2fs_folio_state *ffs; + unsigned int nr_subpages, start, end; + unsigned long flags; + + if (!folio_has_ffs(folio)) + return; + + ffs = folio->private; + nr_subpages = folio_nr_pages(folio); + start = offset >> PAGE_SHIFT; + end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + end = min(end, nr_subpages); + + spin_lock_irqsave(&ffs->state_lock, flags); + bitmap_set(ffs->state, nr_subpages + start, end - start); + spin_unlock_irqrestore(&ffs->state_lock, flags); +} + +static bool f2fs_find_next_need_read_block(const struct folio *folio, + size_t orig_off, size_t *need_off, + size_t len) +{ + size_t start = orig_off; + size_t end = start + len; + size_t head, tail; + pgoff_t index; + + if (start & (PAGE_SIZE - 1)) { + head = round_down(start, PAGE_SIZE); + index = folio->index + (head >> PAGE_SHIFT); + if (!ffs_test_blk_uptodate(folio, index)) { + *need_off = head; + return true; + } + } + + if (end & (PAGE_SIZE - 1)) { + tail = round_down(end - 1, PAGE_SIZE); + index = folio->index + (tail >> PAGE_SHIFT); + if (!ffs_test_blk_uptodate(folio, index)) { + *need_off = tail; + return true; + } + } + + return false; +} + +static int prepare_large_folio_write_begin(struct inode *inode, + struct address_space *mapping, + struct folio *folio, loff_t pos, + unsigned int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + size_t ori_off = offset_in_folio(folio, pos); + size_t need_off = ori_off; + pgoff_t index; + bool get_dn; + int err = 0; + sector_t sector; + struct block_device *bdev; + struct bio *bio; + + len = min_t(unsigned int, len, folio_size(folio) - ori_off); + if (folio_test_uptodate(folio) || len == folio_size(folio)) + return err; + + ffs_find_or_alloc(folio); + + /* Inline data must have been converted before reaching here. */ + if (WARN_ON_ONCE(f2fs_has_inline_data(inode))) + return -EINVAL; + + while (f2fs_find_next_need_read_block(folio, ori_off, &need_off, len)) { + size_t off; + + index = folio->index + (need_off >> PAGE_SHIFT); + get_dn = false; + + if (!f2fs_lookup_read_extent_cache_block(inode, index, + &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) + return -ENODATA; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + get_dn = true; + if (err) + goto out; + } + + if (dn.data_blkaddr == NULL_ADDR) { + err = -EFSCORRUPTED; + goto out; + } + + off = offset_in_folio(folio, index << PAGE_SHIFT); + + if (dn.data_blkaddr == NEW_ADDR) { + folio_zero_segment(folio, off, off + PAGE_SIZE); + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); + continue; + } + + if (!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFSCORRUPTED; + goto out; + } + + /* Submit a synchronous read for this 4K subpage. */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + bdev = f2fs_target_device(sbi, dn.data_blkaddr, §or); + + bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC, + GFP_NOIO, &f2fs_bioset); + bio->bi_iter.bi_sector = sector; + f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS); + + if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) { + bio_put(bio); + err = -EIO; + goto out; + } + + err = submit_bio_wait(bio); + bio_put(bio); + if (err) + goto out; + + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); + + } + +out: + if (get_dn) + f2fs_put_dnode(&dn); + + return err; +} + static int f2fs_read_data_large_folio(struct inode *inode, struct fsverity_info *vi, struct readahead_control *rac, struct folio *folio) @@ -3940,6 +4121,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, bool need_balance = false; bool use_cow = false; block_t blkaddr = NULL_ADDR; + fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS; int err = 0; trace_f2fs_write_begin(inode, pos, len); @@ -3987,9 +4169,9 @@ static int f2fs_write_begin(const struct kiocb *iocb, * Do not use FGP_STABLE to avoid deadlock. * Will wait that below with our IO control. */ - folio = f2fs_filemap_get_folio(mapping, index, - FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS, - mapping_gfp_mask(mapping)); + fgp |= fgf_set_order(len); + folio = __filemap_get_folio(mapping, index, fgp, + mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; @@ -4002,7 +4184,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, if (f2fs_is_atomic_file(inode)) err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); - else + else if (!folio_test_large(folio)) err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) @@ -4023,6 +4205,14 @@ static int f2fs_write_begin(const struct kiocb *iocb, f2fs_folio_wait_writeback(folio, DATA, false, true); + if (folio_test_large(folio)) { + err = prepare_large_folio_write_begin(inode, mapping, folio, + pos, len); + if (!err) + return 0; + goto put_folio; + } + if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; @@ -4076,15 +4266,19 @@ static int f2fs_write_end(const struct kiocb *iocb, trace_f2fs_write_end(inode, pos, len, copied); /* - * This should be come from len == PAGE_SIZE, and we expect copied - * should be PAGE_SIZE. Otherwise, we treat it with zero copied and - * let generic_perform_write() try to copy data again through copied=0. + * If a short copy happens on a folio that isn't uptodate, we treat + * it with zero copied and let generic_perform_write() try to copy + * data again through copied=0. */ if (!folio_test_uptodate(folio)) { - if (unlikely(copied != len)) + if (unlikely(copied != len)) { copied = 0; - else + } else if (folio_test_large(folio)) { + ffs_mark_subrange_uptodate(folio, + offset_in_folio(folio, pos), len); + } else { folio_mark_uptodate(folio); + } } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -4103,6 +4297,9 @@ static int f2fs_write_end(const struct kiocb *iocb, if (!copied) goto unlock_out; + if (folio_test_large(folio)) + ffs_mark_subrange_dirty(folio, offset_in_folio(folio, pos), + copied); folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dd262eb41777..ac71d0d22a81 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4253,6 +4253,8 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct writeback_control *wbc, enum iostat_type io_type, int compr_blocks, bool allow_balance); +bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index); +void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, size_t len); void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 633e9ade654f..eb8e237f3dad 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -5048,9 +5048,20 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, return ret; } - /* Do not preallocate blocks that will be written partially in 4KB. */ - map.m_lblk = F2FS_BLK_ALIGN(pos); - map.m_len = F2FS_BYTES_TO_BLK(pos + count); + if (mapping_large_folio_support(inode->i_mapping)) { + /* + * Preallocate all blocks touched by a large-folio buffered write so + * the regular write_begin path does not need to unlock the folio for + * f2fs_balance_fs(). Rechecking large-folio state after unlock is + * unreliable since partial truncation can split the folio. + */ + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_len = F2FS_BLK_ALIGN(pos + count); + } else { + /* Do not preallocate blocks that will be written partially in 4KB. */ + map.m_lblk = F2FS_BLK_ALIGN(pos); + map.m_len = F2FS_BYTES_TO_BLK(pos + count); + } if (map.m_len > map.m_lblk) map.m_len -= map.m_lblk; else -- 2.34.1 _______________________________________________ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
