This patch adds support for large folios buffered write for regular files with exception of atomic files, inline files and compressed files.
We introducte state bitmap to track the uptodate/dirty state of each sub -page within a folio. And for allocating multiple size f2fs_folio_state, we use kzalloc for now. Better ideas are appreciated. Signed-off-by: Nanzhe Zhao <[email protected]> --- fs/f2fs/data.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 4 - 3 files changed, 210 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3a40db6894fc..3aaf20824205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2414,14 +2414,26 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio) { struct f2fs_folio_state *ffs = folio->private; + unsigned int nr_subpages; if (ffs) return ffs; - ffs = f2fs_kmem_cache_alloc(ffs_entry_slab, - GFP_NOIO | __GFP_ZERO, true, NULL); + nr_subpages = folio_size(folio) >> PAGE_SHIFT; + if (nr_subpages <= 1) + return NULL; + + ffs = kzalloc(struct_size(ffs, state, BITS_TO_LONGS(2 * nr_subpages)), + GFP_NOIO); + if (!ffs) + return NULL; spin_lock_init(&ffs->state_lock); + if (folio_test_uptodate(folio)) + bitmap_set(ffs->state, 0, nr_subpages); + if (folio_test_dirty(folio)) + bitmap_set(ffs->state, nr_subpages, nr_subpages); + folio_attach_private(folio, ffs); return ffs; } @@ -2440,7 +2452,177 @@ static void ffs_detach_free(struct folio *folio) return; WARN_ON_ONCE(ffs->read_pages_pending != 0); - kmem_cache_free(ffs_entry_slab, ffs); + kfree(ffs); +} + +static inline unsigned int ffs_nr_subpages(const struct folio *folio) +{ + return folio_size(folio) >> PAGE_SHIFT; +} + +static inline bool ffs_subpage_is_uptodate(struct f2fs_folio_state *ffs, + const struct folio *folio, size_t offset) +{ + unsigned int idx = offset >> PAGE_SHIFT; + + if (!ffs) + return false; + + if (idx >= ffs_nr_subpages(folio)) + return false; + + return test_bit(idx, ffs->state); +} + +/* Must be called while holding folio lock */ +static inline void ffs_mark_subrange_uptodate(struct folio *folio, size_t offset, size_t len) +{ + struct f2fs_folio_state *ffs = folio->private; + unsigned int nr_subpages = ffs_nr_subpages(folio); + unsigned int start, end; + + if (!ffs) { + folio_mark_uptodate(folio); + return; + } + + start = offset >> PAGE_SHIFT; + end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + end = min(end, nr_subpages); + + bitmap_set(ffs->state, start, end - start); + if (bitmap_full(ffs->state, nr_subpages)) + folio_mark_uptodate(folio); +} + +/* + * During buffered writes, we may only need to read the first and last + * page of a folio. + */ +static bool f2fs_find_next_need_read_block(struct f2fs_folio_state *ffs, + const struct folio *folio, + size_t orig_off, size_t *need_off, + size_t len) +{ + size_t start = orig_off; + size_t end = start + len; + size_t head, tail; + + if (start & (PAGE_SIZE - 1)) { + head = round_down(start, PAGE_SIZE); + if (!ffs_subpage_is_uptodate(ffs, folio, head)) { + *need_off = head; + return true; + } + } + + if (end & (PAGE_SIZE - 1)) { + tail = round_down(end - 1, PAGE_SIZE); + if (!ffs_subpage_is_uptodate(ffs, folio, tail)) { + *need_off = tail; + return true; + } + } + + return false; +} + +static int prepare_large_folio_write_begin(struct inode *inode, + struct address_space *mapping, + struct folio *folio, loff_t pos, + unsigned int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_folio_state *ffs; + struct dnode_of_data dn; + size_t ori_off = offset_in_folio(folio, pos); + size_t need_off = ori_off; + pgoff_t index; + bool get_dn; + int err = 0; + sector_t sector; + struct block_device *bdev; + struct bio *bio; + + len = min_t(unsigned int, len, folio_size(folio) - ori_off); + if (folio_test_uptodate(folio) || len == folio_size(folio)) + return 0; + + ffs = ffs_find_or_alloc(folio); + if (!ffs) + return 0; + + /* Inline data must have been converted before reaching here. */ + if (WARN_ON_ONCE(f2fs_has_inline_data(inode))) + return -EINVAL; + + while (f2fs_find_next_need_read_block(ffs, folio, ori_off, &need_off, len)) { + size_t off; + + index = folio->index + (ori_off >> PAGE_SHIFT); + get_dn = false; + + if (!f2fs_lookup_read_extent_cache_block(inode, index, + &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) + return -ENODATA; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + get_dn = true; + if (err) + goto out; + } + + if (dn.data_blkaddr == NULL_ADDR) { + err = -EFSCORRUPTED; + goto out; + } + + off = offset_in_folio(folio, index << PAGE_SHIFT); + + if (dn.data_blkaddr == NEW_ADDR) { + folio_zero_segment(folio, off, off + PAGE_SIZE); + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); + continue; + } + + if (!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFSCORRUPTED; + goto out; + } + + /* Submit a synchronous read for this subpage. */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + bdev = f2fs_target_device(sbi, dn.data_blkaddr, §or); + + bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ | REQ_SYNC, + GFP_NOIO, &f2fs_bioset); + bio->bi_iter.bi_sector = sector; + f2fs_set_bio_crypt_ctx(bio, inode, index, NULL, GFP_NOFS); + + if (!bio_add_folio(bio, folio, PAGE_SIZE, off)) { + bio_put(bio); + err = -EIO; + goto out; + } + + err = submit_bio_wait(bio); + bio_put(bio); + if (err) + goto out; + + ffs_mark_subrange_uptodate(folio, off, PAGE_SIZE); + +out: + if (get_dn) + f2fs_put_dnode(&dn); + if (err) + return err; + } + + return 0; } static int f2fs_read_data_large_folio(struct inode *inode, @@ -2457,7 +2639,7 @@ static int f2fs_read_data_large_folio(struct inode *inode, int ret = 0; bool folio_in_bio; - if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) { + if (f2fs_compressed_file(inode)) { if (folio) folio_unlock(folio); return -EOPNOTSUPP; @@ -3828,6 +4010,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, bool need_balance = false; bool use_cow = false; block_t blkaddr = NULL_ADDR; + fgf_t fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT; int err = 0; trace_f2fs_write_begin(inode, pos, len); @@ -3875,9 +4058,8 @@ static int f2fs_write_begin(const struct kiocb *iocb, * Do not use FGP_STABLE to avoid deadlock. * Will wait that below with our IO control. */ - folio = f2fs_filemap_get_folio(mapping, index, - FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS, - mapping_gfp_mask(mapping)); + fgp |= fgf_set_order(len); + folio = f2fs_filemap_get_folio(mapping, index, fgp, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; @@ -3890,7 +4072,7 @@ static int f2fs_write_begin(const struct kiocb *iocb, if (f2fs_is_atomic_file(inode)) err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); - else + else if (!folio_test_large(folio)) err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) @@ -3911,6 +4093,13 @@ static int f2fs_write_begin(const struct kiocb *iocb, f2fs_folio_wait_writeback(folio, DATA, false, true); + if (folio_test_large(folio)) { + err = prepare_large_folio_write_begin(inode, mapping, folio, pos, len); + if (!err) + return 0; + goto put_folio; + } + if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; @@ -3963,15 +4152,22 @@ static int f2fs_write_end(const struct kiocb *iocb, trace_f2fs_write_end(inode, pos, len, copied); /* - * This should be come from len == PAGE_SIZE, and we expect copied - * should be PAGE_SIZE. Otherwise, we treat it with zero copied and - * let generic_perform_write() try to copy data again through copied=0. + * If a short copy happens on a folio that isn't uptodate, we treat it + * with zero copied and let generic_perform_write() try to copy data again + * through copied=0. */ if (!folio_test_uptodate(folio)) { - if (unlikely(copied != len)) + if (unlikely(copied != len)) { copied = 0; - else + } else if (folio_test_large(folio)) { + ffs_mark_subrange_uptodate(folio, + offset_in_folio(folio, pos), len); + } else { + /* + * For order-0 folios, this should be come from len == PAGE_SIZE + */ folio_mark_uptodate(folio); + } } #ifdef CONFIG_F2FS_FS_COMPRESSION diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e5b8f5374666..04a6310145c4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2045,6 +2045,7 @@ struct f2fs_sb_info { struct f2fs_folio_state { spinlock_t state_lock; unsigned int read_pages_pending; + unsigned long state[]; }; /* Definitions to access f2fs_sb_info */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 18a9feccb1f9..9479f4d447c9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -624,10 +624,6 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - if (mapping_large_folio_support(inode->i_mapping) && - filp->f_mode & FMODE_WRITE) - return -EOPNOTSUPP; - err = fsverity_file_open(inode, filp); if (err) return err; -- 2.34.1 _______________________________________________ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
