To avoid the complexity of unlocking a large folio in write_begin, preallocate partial blocks for inodes that can use large folios. During write_begin, read only the partial head and tail 4K subpages that need read-before-write, and skip read I/O for the full middle subpages covered by the write.
Signed-off-by: Nanzhe Zhao <[email protected]> --- fs/f2fs/data.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 17 ++- 3 files changed, 288 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a2bb6d982df..ad24a0e2da5f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2575,6 +2575,139 @@ static void ffs_detach_free(struct folio *folio) kfree(ffs); } +bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index) +{ + struct f2fs_folio_state *ffs; + size_t offset; + unsigned int idx; + + if (!folio_has_ffs(folio)) + return folio_test_uptodate(folio); + + ffs = (struct f2fs_folio_state *)folio->private; + offset = offset_in_folio(folio, (loff_t)index << PAGE_SHIFT); + idx = offset >> PAGE_SHIFT; + return test_bit(idx, ffs->state); +} + +static bool __ffs_mark_subrange_uptodate(struct folio *folio, + struct f2fs_folio_state *ffs, size_t offset, size_t len) +{ + unsigned int nr_subpages = folio_nr_pages(folio); + unsigned int start, end; + + start = offset >> PAGE_SHIFT; + end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + end = min(end, nr_subpages); + + bitmap_set(ffs->state, start, end - start); + return bitmap_full(ffs->state, nr_subpages); +} + +static void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, + size_t len) +{ + struct f2fs_folio_state *ffs; + unsigned long flags; + bool mark_uptodate = false; + + if (!folio_has_ffs(folio)) { + folio_mark_uptodate(folio); + return; + } + + ffs = (struct f2fs_folio_state *)folio->private; + spin_lock_irqsave(&ffs->state_lock, flags); + mark_uptodate = __ffs_mark_subrange_uptodate(folio, ffs, offset, len) && + !ffs->read_pages_pending; + spin_unlock_irqrestore(&ffs->state_lock, flags); + if (mark_uptodate) + folio_mark_uptodate(folio); +} + +static void ffs_mark_subrange_dirty(struct folio *folio, + size_t offset, size_t len) +{ + struct f2fs_folio_state *ffs; + unsigned int nr_subpages, start, end; + unsigned long flags; + + if (!folio_has_ffs(folio)) + return; + + ffs = (struct f2fs_folio_state *)folio->private; + nr_subpages = folio_nr_pages(folio); + start = offset >> PAGE_SHIFT; + end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + end = min(end, nr_subpages); + + spin_lock_irqsave(&ffs->state_lock, flags); + bitmap_set(ffs->state, nr_subpages + start, end - start); + spin_unlock_irqrestore(&ffs->state_lock, flags); +} + +static bool f2fs_find_next_need_read_block(const struct folio *folio, + size_t orig_off, size_t *need_off, + size_t len) +{ + size_t start = orig_off; + size_t end = start + len; + size_t head, tail; + pgoff_t index; + + if (start & (PAGE_SIZE - 1)) { + head = round_down(start, PAGE_SIZE); + index = folio->index + (head >> PAGE_SHIFT); + if (!ffs_test_blk_uptodate(folio, index)) { + *need_off = head; + return true; + } + } + + if (end & (PAGE_SIZE - 1)) { + tail = round_down(end - 1, PAGE_SIZE); + index = folio->index + (tail >> PAGE_SHIFT); + if (!ffs_test_blk_uptodate(folio, index)) { + *need_off = tail; + return true; + } + } + + return false; +} + +static int f2fs_prealloc_large_folio_write_blocks(struct inode *inode, + loff_t pos, + unsigned int len) +{ + struct f2fs_map_blocks map; + block_t start = F2FS_BYTES_TO_BLK(pos); + block_t end = F2FS_BLK_ALIGN(pos + len); + int ret; + + while (start < end) { + memset(&map, 0, sizeof(map)); + + map.m_lblk = start; + map.m_len = end - start; + map.m_seg_type = NO_CHECK_TYPE; + + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; + + ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); + if (ret) + return ret; + + if (!map.m_len) + return -ENODATA; + + start += map.m_len; + } + + return 0; +} + static int f2fs_read_data_large_folio(struct inode *inode, struct fsverity_info *vi, struct readahead_control *rac, struct folio *folio) @@ -3966,6 +4099,105 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, return 0; } +static int prepare_large_folio_write_begin(struct inode *inode, + struct folio *folio, loff_t pos, + unsigned int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_folio_state *ffs; + size_t ori_off = offset_in_folio(folio, pos); + size_t need_off = ori_off; + int err = 0; + + len = min_t(unsigned int, len, folio_size(folio) - ori_off); + if (!is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) { + err = f2fs_prealloc_large_folio_write_blocks(inode, pos, len); + if (err) + return err; + } + + if (folio_test_uptodate(folio) || len == folio_size(folio)) + return 0; + + ffs = ffs_find_or_alloc(folio); + if (!ffs) + return -ENOMEM; + + /* Inline data must have been converted before reaching here. */ + if (WARN_ON_ONCE(f2fs_has_inline_data(inode))) + return -EINVAL; + + while (f2fs_find_next_need_read_block(folio, ori_off, + &need_off, len)) { + struct dnode_of_data dn; + pgoff_t index = folio->index + (need_off >> PAGE_SHIFT); + size_t off = offset_in_folio(folio, index << PAGE_SHIFT); + block_t blkaddr; + bool get_dn = false; + sector_t sector; + struct block_device *bdev; + struct bio *bio; + + if (!f2fs_lookup_read_extent_cache_block(inode, index, + &blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) + return -ENODATA; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + return err; + get_dn = true; + blkaddr = dn.data_blkaddr; + } + + if (blkaddr == NULL_ADDR) { + err = -EFSCORRUPTED; + goto out; + } + + if (blkaddr == NEW_ADDR) { + folio_zero_segment(folio, off, off + PAGE_SIZE); + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); + goto out; + } + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFSCORRUPTED; + goto out; + } + + f2fs_wait_on_block_writeback(inode, blkaddr); + bdev = f2fs_target_device(sbi, blkaddr, §or); + + bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC, + GFP_NOIO, &f2fs_bioset); + bio->bi_iter.bi_sector = sector; + f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS); + + if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) { + bio_put(bio); + err = -EIO; + goto out; + } + + err = submit_bio_wait(bio); + bio_put(bio); + if (err) + goto out; + + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); +out: + if (get_dn) + f2fs_put_dnode(&dn); + if (err) + return err; + } + + return 0; +} + static int f2fs_write_begin(const struct kiocb *iocb, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, @@ -3977,6 +4209,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, pgoff_t index = pos >> PAGE_SHIFT; bool need_balance = false; block_t blkaddr = NULL_ADDR; + fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS; int err = 0; trace_f2fs_write_begin(inode, pos, len); @@ -4024,9 +4257,9 @@ static int f2fs_write_begin(const struct kiocb *iocb, * Do not use FGP_STABLE to avoid deadlock. * Will wait that below with our IO control. */ - folio = f2fs_filemap_get_folio(mapping, index, - FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS, - mapping_gfp_mask(mapping)); + fgp |= fgf_set_order(len); + folio = __filemap_get_folio(mapping, index, fgp, + mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; @@ -4039,7 +4272,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, if (f2fs_is_atomic_file(inode)) err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); - else + else if (!folio_test_large(folio)) err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) @@ -4060,6 +4293,14 @@ static int f2fs_write_begin(const struct kiocb *iocb, f2fs_folio_wait_writeback(folio, DATA, false, true); + if (folio_test_large(folio)) { + err = prepare_large_folio_write_begin(inode, + folio, pos, len); + if (!err) + return 0; + goto put_folio; + } + if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; @@ -4120,15 +4361,19 @@ static int f2fs_write_end(const struct kiocb *iocb, trace_f2fs_write_end(inode, pos, len, copied); /* - * This should be come from len == PAGE_SIZE, and we expect copied - * should be PAGE_SIZE. Otherwise, we treat it with zero copied and - * let generic_perform_write() try to copy data again through copied=0. + * If a short copy happens on a folio that isn't uptodate, we treat + * it with zero copied and let generic_perform_write() try to copy + * data again through copied=0. */ if (!folio_test_uptodate(folio)) { - if (unlikely(copied != len)) + if (unlikely(copied != len)) { copied = 0; - else + } else if (folio_test_large(folio)) { + ffs_mark_subrange_uptodate(folio, + offset_in_folio(folio, pos), len); + } else { folio_mark_uptodate(folio); + } } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -4147,6 +4392,9 @@ static int f2fs_write_end(const struct kiocb *iocb, if (!copied) goto unlock_out; + if (folio_test_large(folio)) + ffs_mark_subrange_dirty(folio, offset_in_folio(folio, pos), + copied); folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) @@ -4209,8 +4457,22 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping, trace_f2fs_set_page_dirty(folio, DATA); - if (!folio_test_uptodate(folio)) - folio_mark_uptodate(folio); + if (!folio_test_uptodate(folio)) { + bool uptodate = true; + + if (folio_has_ffs(folio)) { + struct f2fs_folio_state *ffs = + (struct f2fs_folio_state *)folio->private; + unsigned long flags; + + spin_lock_irqsave(&ffs->state_lock, flags); + uptodate = bitmap_full(ffs->state, folio_nr_pages(folio)) && + !ffs->read_pages_pending; + spin_unlock_irqrestore(&ffs->state_lock, flags); + } + if (uptodate) + folio_mark_uptodate(folio); + } BUG_ON(folio_test_swapcache(folio)); if (filemap_dirty_folio(mapping, folio)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4c2902abe499..2443fa3647d2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4255,6 +4255,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct writeback_control *wbc, enum iostat_type io_type, int compr_blocks, bool allow_balance); +bool ffs_test_blk_uptodate(const struct folio *folio, pgoff_t index); void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8acdd94272a0..1dddd4b04770 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -5046,9 +5046,20 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, return ret; } - /* Do not preallocate blocks that will be written partially in 4KB. */ - map.m_lblk = F2FS_BLK_ALIGN(pos); - map.m_len = F2FS_BYTES_TO_BLK(pos + count); + if (mapping_large_folio_support(inode->i_mapping)) { + /* + * Preallocate all blocks touched by a large-folio buffered write so + * the regular write_begin path does not need to unlock the folio for + * f2fs_balance_fs(). Rechecking large-folio state after unlock is + * unreliable since partial truncation can split the folio. + */ + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_len = F2FS_BLK_ALIGN(pos + count); + } else { + /* Do not preallocate blocks that will be written partially in 4KB. */ + map.m_lblk = F2FS_BLK_ALIGN(pos); + map.m_len = F2FS_BYTES_TO_BLK(pos + count); + } if (map.m_len > map.m_lblk) map.m_len -= map.m_lblk; else -- 2.34.1 _______________________________________________ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
