On 11/21, Chao Yu wrote:
> On 11/21/2025 7:54 AM, Jaegeuk Kim via Linux-f2fs-devel wrote:
> > This patch enables large folio for limited case where we can get the 
> > high-order
> > memory allocation. It supports the encrypted and fsverity files, which are
> > essential for Android environment.
> > 
> > How to test:
> > - dd if=/dev/zero of=/mnt/test/test bs=1G count=4
> > - f2fs_io setflags immutable /mnt/test/test
> > - echo 3 > /proc/sys/vm/drop_caches
> >   : to reload inode with large folio
> > - f2fs_io read 32 0 1024 mmap 0 0 /mnt/test/test
> > 
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> >   fs/f2fs/data.c  | 245 ++++++++++++++++++++++++++++++++++++++++++++++--
> >   fs/f2fs/f2fs.h  |  16 ++++
> >   fs/f2fs/inode.c |   6 +-
> >   3 files changed, 257 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 48c20386f031..8f433677c49d 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -31,9 +31,15 @@
> >   static struct kmem_cache *bio_post_read_ctx_cache;
> >   static struct kmem_cache *bio_entry_slab;
> > +static struct kmem_cache *ffs_entry_slab;
> >   static mempool_t *bio_post_read_ctx_pool;
> >   static struct bio_set f2fs_bioset;
> > +struct f2fs_folio_state {
> > +   spinlock_t              state_lock;
> > +   unsigned int            read_pages_pending;
> > +};
> > +
> >   #define   F2FS_BIO_POOL_SIZE      NR_CURSEG_TYPE
> >   int __init f2fs_init_bioset(void)
> > @@ -138,11 +144,15 @@ static void f2fs_finish_read_bio(struct bio *bio, 
> > bool in_task)
> >   {
> >     struct folio_iter fi;
> >     struct bio_post_read_ctx *ctx = bio->bi_private;
> > +   unsigned long flags;
> >     bio_for_each_folio_all(fi, bio) {
> >             struct folio *folio = fi.folio;
> > +           unsigned nr_pages = fi.length >> PAGE_SHIFT;
> > +           bool finished = true;
> > -           if (f2fs_is_compressed_page(folio)) {
> > +           if (!folio_test_large(folio) &&
> > +               f2fs_is_compressed_page(folio)) {
> >                     if (ctx && !ctx->decompression_attempted)
> >                             f2fs_end_read_compressed_page(folio, true, 0,
> >                                                     in_task);
> > @@ -156,8 +166,20 @@ static void f2fs_finish_read_bio(struct bio *bio, bool 
> > in_task)
> >                             bio->bi_status = BLK_STS_IOERR;
> >             }
> > -           dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
> > -           folio_end_read(folio, bio->bi_status == BLK_STS_OK);
> > +           if (folio_test_large(folio)) {
> > +                   struct f2fs_folio_state *ffs = folio->private;
> > +
> > +                   spin_lock_irqsave(&ffs->state_lock, flags);
> > +                   ffs->read_pages_pending -= nr_pages;
> > +                   finished = !ffs->read_pages_pending;
> > +                   spin_unlock_irqrestore(&ffs->state_lock, flags);
> > +           }
> > +
> > +           while (nr_pages--)
> > +                   dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
> > +
> > +           if (finished)
> > +                   folio_end_read(folio, bio->bi_status == BLK_STS_OK);
> >     }
> >     if (ctx)
> > @@ -518,6 +540,9 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, 
> > const struct inode *inode,
> >   void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> >                              enum page_type type)
> >   {
> > +   if (!bio)
> > +           return;
> > +
> >     WARN_ON_ONCE(!is_read_io(bio_op(bio)));
> >     trace_f2fs_submit_read_bio(sbi->sb, type, bio);
> > @@ -1209,11 +1234,21 @@ struct folio *f2fs_get_read_data_folio(struct inode 
> > *inode, pgoff_t index,
> >     struct dnode_of_data dn;
> >     struct folio *folio;
> >     int err;
> > -
> > +retry:
> >     folio = f2fs_grab_cache_folio(mapping, index, for_write);
> >     if (IS_ERR(folio))
> >             return folio;
> > +   if (folio_test_large(folio)) {
> > +           pgoff_t folio_index = mapping_align_index(mapping, index);
> > +
> > +           f2fs_folio_put(folio, true);
> > +           invalidate_inode_pages2_range(mapping, folio_index,
> > +                           folio_index + folio_nr_pages(folio) - 1);
> > +           f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT);
> > +           goto retry;
> > +   }
> 
> Do we need to move above check into f2fs_grab_cache_folio()? as we call
> f2fs_grab_cache_folio() in a lot of place.

We're okay with high-order allocation in other path, but I think this is
the only problem since it goes to GC writes.

> 
> > +
> >     if (f2fs_lookup_read_extent_cache_block(inode, index,
> >                                             &dn.data_blkaddr)) {
> >             if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
> > @@ -2341,6 +2376,177 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, 
> > struct bio **bio_ret,
> >   }
> >   #endif
> > +static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio)
> > +{
> > +   struct f2fs_folio_state *ffs = folio->private;
> > +
> > +   if (ffs)
> > +           return ffs;
> > +
> > +   ffs = f2fs_kmem_cache_alloc(ffs_entry_slab, GFP_NOIO, true, NULL);
> > +
> > +   spin_lock_init(&ffs->state_lock);
> > +   folio_attach_private(folio, ffs);
> > +   return ffs;
> > +}
> > +
> > +static void ffs_detach_free(struct folio *folio)
> > +{
> > +   struct f2fs_folio_state *ffs;
> > +
> > +   if (!folio_test_large(folio)) {
> > +           folio_detach_private(folio);
> > +           return;
> > +   }
> > +
> > +   ffs = folio_detach_private(folio);
> > +   if (!ffs)
> > +           return;
> > +
> > +   WARN_ON_ONCE(ffs->read_pages_pending != 0);
> > +   kmem_cache_free(ffs_entry_slab, ffs);
> > +}
> > +
> > +static int f2fs_read_data_large_folio(struct inode *inode,
> > +           struct readahead_control *rac, struct folio *folio)
> > +{
> > +   struct bio *bio = NULL;
> > +   sector_t last_block_in_bio = 0;
> > +   struct f2fs_map_blocks map;
> > +   pgoff_t index, offset;> +       unsigned max_nr_pages = rac ? 
> > readahead_count(rac) :
> > +                           folio_nr_pages(folio);
> > +   unsigned nrpages;
> > +   struct f2fs_folio_state *ffs;
> > +   int ret = 0;
> > +
> > +   if (f2fs_compressed_file(inode))
> > +           return -EOPNOTSUPP;
> 
> if (!IS_IMMUTABLE(inode))
>       return -EOPNOTSUPP;
> 
> We can configure inode after this check? Can we add some sanity check to 
> prevent
> enabling compress/immutable/quota if inode has already enabled large folio?

I think immutable will prevent most of the changes?

> 
> > +
> > +   memset(&map, 0, sizeof(map));
> 
> Can be replaced w/ struct f2fs_map_blocks map = {0, };
> 
> > +   map.m_seg_type = NO_CHECK_TYPE;
> > +
> > +   if (rac)
> > +           folio = readahead_folio(rac);
> > +next_folio:
> > +   if (!folio)
> > +           goto out;
> > +
> > +   index = folio->index;
> > +   offset = 0;
> > +   ffs = NULL;
> > +   nrpages = folio_nr_pages(folio);
> > +
> > +   for (; nrpages; nrpages--) {
> > +           sector_t block_nr;
> > +           /*
> > +            * Map blocks using the previous result first.
> > +            */
> > +           if ((map.m_flags & F2FS_MAP_MAPPED) &&
> > +                           index > map.m_lblk &&
> > +                           index < (map.m_lblk + map.m_len))
> > +                   goto got_it;
> > +
> > +           /*
> > +            * Then do more f2fs_map_blocks() calls until we are
> > +            * done with this page.
> > +            */
> > +           memset(&map, 0, sizeof(map));
> > +           map.m_seg_type = NO_CHECK_TYPE;
> > +           map.m_lblk = index;
> > +           map.m_len = max_nr_pages;
> > +
> > +           ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
> > +           if (ret)
> > +                   goto err_out;
> > +got_it:
> > +           if ((map.m_flags & F2FS_MAP_MAPPED)) {
> > +                   block_nr = map.m_pblk + index - map.m_lblk;
> > +                   if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
> > +                                           DATA_GENERIC_ENHANCE_READ)) {
> > +                           ret = -EFSCORRUPTED;
> > +                           goto err_out;
> > +                   }
> > +           } else {
> > +                   folio_zero_range(folio, offset << PAGE_SHIFT, 
> > PAGE_SIZE);
> > +                   if (f2fs_need_verity(inode, index) &&
> > +                       !fsverity_verify_page(folio_file_page(folio,
> > +                                                           index))) {
> > +                           ret = -EIO;
> > +                           goto err_out;
> > +                   }
> > +                   continue;
> > +           }
> > +
> > +           /*
> > +            * This page will go to BIO.  Do we need to send this
> > +            * BIO off first?
> > +            */
> > +           if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
> > +                                           last_block_in_bio, block_nr) ||
> > +                   !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
> > +submit_and_realloc:
> > +                   f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
> > +                   bio = NULL;
> > +           }
> > +           if (bio == NULL)
> > +                   bio = f2fs_grab_read_bio(inode, block_nr,
> > +                                   max_nr_pages,
> > +                                   f2fs_ra_op_flags(rac),
> > +                                   index, false);
> > +
> > +           /*
> > +            * If the page is under writeback, we need to wait for
> > +            * its completion to see the correct decrypted data.
> > +            */
> > +           f2fs_wait_on_block_writeback(inode, block_nr);
> > +
> > +           if (!bio_add_folio(bio, folio, F2FS_BLKSIZE,
> > +                                   offset << PAGE_SHIFT))> +               
> >         goto submit_and_realloc;
> > +
> > +           if (folio_test_large(folio)) {
> > +                   ffs = ffs_find_or_alloc(folio);
> > +
> > +                   /* set the bitmap to wait */
> > +                   spin_lock_irq(&ffs->state_lock);
> > +                   ffs->read_pages_pending++;
> > +                   spin_unlock_irq(&ffs->state_lock);
> > +           }
> > +
> > +           inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
> > +           f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
> > +                           F2FS_BLKSIZE);
> > +           last_block_in_bio = block_nr;
> > +           index++;
> > +           offset++;
> > +   }
> > +   if (rac) {
> > +           folio = readahead_folio(rac);
> > +           goto next_folio;
> > +   }
> > +err_out:
> > +   /* Nothing was submitted. */
> > +   if (!bio) {
> > +           if (!ret)
> > +                   folio_mark_uptodate(folio);
> > +           folio_unlock(folio);
> > +           return ret;
> > +   }
> > +
> > +   if (ret) {
> > +           f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
> > +
> > +           /* Wait bios and clear uptodate. */
> > +           folio_lock(folio);
> > +           folio_clear_uptodate(folio);
> > +           folio_unlock(folio);
> > +   }
> > +out:
> > +   f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
> > +   return ret;
> > +}
> > +
> >   /*
> >    * This function was originally taken from fs/mpage.c, and customized for 
> > f2fs.
> >    * Major change was from block_size == page_size in f2fs by default.
> > @@ -2366,9 +2572,13 @@ static int f2fs_mpage_readpages(struct inode *inode,
> >     pgoff_t index;
> >   #endif
> >     unsigned nr_pages = rac ? readahead_count(rac) : 1;
> > +   struct address_space *mapping = rac ? rac->mapping : folio->mapping;
> >     unsigned max_nr_pages = nr_pages;
> >     int ret = 0;
> > +   if (mapping_large_folio_support(mapping))
> > +           return f2fs_read_data_large_folio(inode, rac, folio);
> > +
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> >     if (f2fs_compressed_file(inode)) {
> >             index = rac ? readahead_index(rac) : folio->index;
> > @@ -2459,8 +2669,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
> >             }
> >   #endif
> >     }
> > -   if (bio)
> > -           f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
> > +   f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
> >     return ret;
> >   }
> > @@ -3747,7 +3956,12 @@ void f2fs_invalidate_folio(struct folio *folio, 
> > size_t offset, size_t length)
> >                     f2fs_remove_dirty_inode(inode);
> >             }
> >     }
> > -   folio_detach_private(folio);
> > +
> > +   if (offset || length != folio_size(folio))
> > +           return;
> > +
> > +   folio_cancel_dirty(folio);
> > +   ffs_detach_free(folio);
> >   }
> >   bool f2fs_release_folio(struct folio *folio, gfp_t wait)
> > @@ -3756,7 +3970,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t 
> > wait)
> >     if (folio_test_dirty(folio))
> >             return false;
> > -   folio_detach_private(folio);
> > +   ffs_detach_free(folio);
> >     return true;
> >   }
> > @@ -4162,12 +4376,25 @@ int __init f2fs_init_bio_entry_cache(void)
> >   {
> >     bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
> >                     sizeof(struct bio_entry));
> > -   return bio_entry_slab ? 0 : -ENOMEM;
> > +
> > +   if (!bio_entry_slab)
> > +           return -ENOMEM;
> > +
> > +   ffs_entry_slab = f2fs_kmem_cache_create("f2fs_ffs_slab",
> > +                   sizeof(struct f2fs_folio_state));
> > +
> > +   if (!ffs_entry_slab) {
> > +           kmem_cache_destroy(bio_entry_slab);
> > +           return -ENOMEM;
> > +   }
> > +
> > +   return 0;
> >   }
> >   void f2fs_destroy_bio_entry_cache(void)
> >   {
> >     kmem_cache_destroy(bio_entry_slab);
> > +   kmem_cache_destroy(ffs_entry_slab);
> >   }
> >   static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t 
> > length,
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index dffe8958b580..3340db04a7c2 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -4916,6 +4916,22 @@ static inline bool is_journalled_quota(struct 
> > f2fs_sb_info *sbi)
> >     return false;
> >   }
> > +static inline bool f2fs_quota_file(struct inode *inode)
> > +{
> > +#ifdef CONFIG_QUOTA
> > +   int i;
> > +
> > +   if (!f2fs_sb_has_quota_ino(F2FS_I_SB(inode)))
> > +           return false;
> > +
> > +   for (i = 0; i < MAXQUOTAS; i++) {
> > +           if (f2fs_qf_ino(F2FS_I_SB(inode)->sb, i) == inode->i_ino)
> > +                   return true;
> > +   }
> > +#endif
> > +   return false;
> > +}
> > +
> >   static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi)
> >   {
> >     return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index e2405b79b3cc..9162154d5211 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -597,6 +597,8 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >     if (ret)
> >             goto bad_inode;
> >   make_now:
> > +   f2fs_set_inode_flags(inode);
> > +
> >     if (ino == F2FS_NODE_INO(sbi)) {
> >             inode->i_mapping->a_ops = &f2fs_node_aops;
> >             mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> > @@ -618,6 +620,9 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >             inode->i_op = &f2fs_file_inode_operations;
> >             inode->i_fop = &f2fs_file_operations;
> >             inode->i_mapping->a_ops = &f2fs_dblock_aops;
> > +           if (IS_IMMUTABLE(inode) && !f2fs_compressed_file(inode) &&
> > +               !f2fs_quota_file(inode))
> > +                   mapping_set_folio_min_order(inode->i_mapping, 0);
> >     } else if (S_ISDIR(inode->i_mode)) {
> >             inode->i_op = &f2fs_dir_inode_operations;
> >             inode->i_fop = &f2fs_dir_operations;
> > @@ -638,7 +643,6 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >             ret = -EIO;
> >             goto bad_inode;
> >     }
> > -   f2fs_set_inode_flags(inode);
> >     unlock_new_inode(inode);
> >     trace_f2fs_iget(inode);


_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to