On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > This patches adds bitmaps to represent empty or full NAT blocks containing
> > free nid entries.
> > 
> > If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> > use these bitmaps when building free nids. In order to avoid checkpointing
> > burden, up-to-date bitmaps will be flushed only during umount time. So,
> > normally we can get this gain, but when power-cut happens, we rely on 
> > fsck.f2fs
> > which recovers this bitmap again.
> > 
> > After this patch, we build free nids from nid #0 at mount time to make more
> > full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> > without loading any NAT pages from disk.
> > 
> > Signed-off-by: Jaegeuk Kim <jaeg...@kernel.org>
> > ---
> >  fs/f2fs/checkpoint.c    |  29 +++++++-
> >  fs/f2fs/debug.c         |   1 +
> >  fs/f2fs/f2fs.h          |  23 +++++-
> >  fs/f2fs/node.c          | 188 
> > +++++++++++++++++++++++++++++++++++++++++++-----
> >  fs/f2fs/segment.c       |   2 +-
> >  include/linux/f2fs_fs.h |   1 +
> >  6 files changed, 224 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 042f8d9afe44..783c5c3f16a4 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
> > *sbi, struct cp_control *cpc)
> >  
> >     spin_lock(&sbi->cp_lock);
> >  
> > +   if (ckpt->cp_pack_total_block_count >
> > +                   sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> > +           disable_nat_bits(sbi, false);
> 
> I think we need to drop nat full/empty bitmap only if there is no enough space
> in CP area while doing umount, otherwise we can keep this in memory.

Yup.

> 
> > +
> >     if (cpc->reason == CP_UMOUNT)
> >             __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> >     else
> > @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> > struct cp_control *cpc)
> >  
> >     start_blk = __start_cp_next_addr(sbi);
> >  
> > +   /* write nat bits */

...

> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   struct page *page;
> > +   unsigned int i = 0;
> > +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +   nid_t nid;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +           return -EAGAIN;
> > +
> > +   down_read(&nm_i->nat_tree_lock);
> > +check_empty:
> > +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +           i = 0;
> > +           goto check_partial;
> > +   }
> > +
> > +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> > +                                                                   nid++) {
> > +           if (unlikely(nid >= nm_i->max_nid))
> > +                   break;
> > +           add_free_nid(sbi, nid, true);
> > +   }
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +           goto out;
> > +   i++;
> > +   goto check_empty;
> > +
> > +check_partial:
> > +   i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +           disable_nat_bits(sbi, true);
> 
> Can this happen in real world? Should be a bug in somewhere?

It happens, since current design handles full_nat_bits optionally in order
to avoid scanning a whole NAT page to set it back as 1 from 0.

> 
> > +           return -EINVAL;
> > +   }
> > +
> > +   nid = i * NAT_ENTRY_PER_BLOCK;
> > +   page = get_current_nat_page(sbi, nid);
> > +   scan_nat_page(sbi, page, nid);
> > +   f2fs_put_page(page, 1);
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +           i++;
> > +           goto check_partial;
> > +   }
> > +out:
> > +   up_read(&nm_i->nat_tree_lock);
> > +   return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> > mount)
> >  {
> >     struct f2fs_nm_info *nm_i = NM_I(sbi);
> >     struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> > *sbi, bool sync)
> >     if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >             return;
> >  
> > +   /* try to find free nids with nat_bits */
> > +   if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> > +           return;
> > +
> > +   /* find next valid candidate */
> 
> This is just for mount case?

Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
NAT pages.

Thanks,

> 
> > +   if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> > +           int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> > +                                   nm_i->nat_blocks, 0);
> > +           if (idx >= nm_i->nat_blocks)
> > +                   set_sbi_flag(sbi, SBI_NEED_FSCK);
> > +           else
> > +                   nid = idx * NAT_ENTRY_PER_BLOCK;
> > +   }
> > +
> >     /* readahead nat pages to be scanned */
> >     ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >                                                     META_NAT, true);
> > @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info 
> > *sbi, bool sync)
> >                                     nm_i->ra_nid_pages, META_NAT, false);
> >  }
> >  
> > -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> > +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >  {
> >     mutex_lock(&NM_I(sbi)->build_lock);
> > -   __build_free_nids(sbi, sync);
> > +   __build_free_nids(sbi, sync, mount);
> >     mutex_unlock(&NM_I(sbi)->build_lock);
> >  }
> >  
> > @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >     spin_unlock(&nm_i->nid_list_lock);
> >  
> >     /* Let's scan nat pages and its caches to get free nids */
> > -   build_free_nids(sbi, true);
> > +   build_free_nids(sbi, true, false);
> >     goto retry;
> >  }
> >  
> > @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct 
> > nat_entry_set *nes,
> >     list_add_tail(&nes->set_list, head);
> >  }
> >  
> > +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> > +                                           struct page *page)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> > +   struct f2fs_nat_block *nat_blk = page_address(page);
> > +   int valid = 0;
> > +   int i;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +           return;
> > +
> > +   for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> > +           if (start_nid == 0 && i == 0)
> > +                   valid++;
> > +           if (nat_blk->entries[i].block_addr)
> > +                   valid++;
> > +   }
> > +   if (valid == 0) {
> > +           test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> > +           test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> 
> set_bit_le/clear_bit_le
> 
> > +           return;
> > +   }
> > +
> > +   test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> 
> ditto
> 
> > +   if (valid == NAT_ENTRY_PER_BLOCK)
> > +           test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> > +   else
> > +           test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> 
> ditto
> 
> > +}
> > +
> >  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > -                                   struct nat_entry_set *set)
> > +           struct nat_entry_set *set, struct cp_control *cpc)
> >  {
> >     struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >     struct f2fs_journal *journal = curseg->journal;
> > @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
> > *sbi,
> >      * #1, flush nat entries to journal in current hot data summary block.
> >      * #2, flush nat entries to nat page.
> >      */
> > -   if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> > +   if (cpc->reason == CP_UMOUNT ||
> 
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> 
> > +           !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >             to_journal = false;
> >  
> >     if (to_journal) {
> > @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct 
> > f2fs_sb_info *sbi,
> >             }
> >     }
> >  
> > -   if (to_journal)
> > +   if (to_journal) {
> >             up_write(&curseg->journal_rwsem);
> > -   else
> > +   } else {
> > +           __update_nat_bits(sbi, start_nid, page);
> >             f2fs_put_page(page, 1);
> > +   }
> >  
> >     f2fs_bug_on(sbi, set->entry_cnt);
> >  
> > @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
> > *sbi,
> >  /*
> >   * This function is called during the checkpointing process.
> >   */
> > -void flush_nat_entries(struct f2fs_sb_info *sbi)
> > +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> >  {
> >     struct f2fs_nm_info *nm_i = NM_I(sbi);
> >     struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >      * entries, remove all entries from journal and merge them
> >      * into nat entry set.
> >      */
> > -   if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > +   if (cpc->reason == CP_UMOUNT ||
> 
> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) ||
> 
> > +           !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> >             remove_nats_in_journal(sbi);
> >  
> >     while ((found = __gang_lookup_nat_set(nm_i,
> > @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >  
> >     /* flush dirty nats in nat entry set */
> >     list_for_each_entry_safe(set, tmp, &sets, set_list)
> > -           __flush_nat_entry_set(sbi, set);
> > +           __flush_nat_entry_set(sbi, set, cpc);
> >  
> >     up_write(&nm_i->nat_tree_lock);
> >  
> >     f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >  }
> >  
> > +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> > +   unsigned int i;
> > +   __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> 
> __u64 cp_ver = cur_cp_version(ckpt);
> 
> Thanks,
> 
> > +   size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> > +   __u64 crc = le32_to_cpu(*((__le32 *)
> > +                           ((unsigned char *)ckpt + crc_offset)));
> > +   block_t nat_bits_addr;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +           return 0;
> > +
> > +   nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
> > +                                           F2FS_BLKSIZE - 1);
> > +   nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
> > +                                           GFP_KERNEL);
> > +   if (!nm_i->nat_bits)
> > +           return -ENOMEM;
> > +
> > +   nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> > +                                           nm_i->nat_bits_blocks;
> > +   for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> > +           struct page *page = get_meta_page(sbi, nat_bits_addr++);
> > +
> > +           memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> > +                                   page_address(page), F2FS_BLKSIZE);
> > +           f2fs_put_page(page, 1);
> > +   }
> > +
> > +   cp_ver |= (crc << 32);
> > +   if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> > +           disable_nat_bits(sbi, true);
> > +           return 0;
> > +   }
> > +
> > +   nm_i->full_nat_bits = nm_i->nat_bits + 8;
> > +   nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> > +
> > +   f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> > +   return 0;
> > +}
> > +
> >  static int init_node_manager(struct f2fs_sb_info *sbi)
> >  {
> >     struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >     struct f2fs_nm_info *nm_i = NM_I(sbi);
> >     unsigned char *version_bitmap;
> > -   unsigned int nat_segs, nat_blocks;
> > +   unsigned int nat_segs;
> > +   int err;
> >  
> >     nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >  
> >     /* segment_count_nat includes pair segment so divide to 2. */
> >     nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> > -   nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > -
> > -   nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> > +   nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
> > +   nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >  
> >     /* not used nids: 0, node, meta, (and root counted as valid node) */
> >     nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
> > @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info 
> > *sbi)
> >     if (!nm_i->nat_bitmap)
> >             return -ENOMEM;
> >  
> > +   err = __get_nat_bitmaps(sbi);
> > +   if (err)
> > +           return err;
> > +
> >  #ifdef CONFIG_F2FS_CHECK_FS
> >     nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
> >                                     GFP_KERNEL);
> > @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >     if (err)
> >             return err;
> >  
> > -   build_free_nids(sbi, true);
> > +   build_free_nids(sbi, true, true);
> >     return 0;
> >  }
> >  
> > @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
> >     up_write(&nm_i->nat_tree_lock);
> >  
> >     kfree(nm_i->nat_bitmap);
> > +   kfree(nm_i->nat_bits);
> >  #ifdef CONFIG_F2FS_CHECK_FS
> >     kfree(nm_i->nat_bitmap_mir);
> >  #endif
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index df2ff5cfe8f4..8e1ec248c653 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >     if (!available_free_memory(sbi, FREE_NIDS))
> >             try_to_free_nids(sbi, MAX_FREE_NIDS);
> >     else
> > -           build_free_nids(sbi, false);
> > +           build_free_nids(sbi, false, false);
> >  
> >     if (!is_idle(sbi))
> >             return;
> > diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> > index f0748524ca8c..1c92ace2e8f8 100644
> > --- a/include/linux/f2fs_fs.h
> > +++ b/include/linux/f2fs_fs.h
> > @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >  /*
> >   * For checkpoint
> >   */
> > +#define CP_NAT_BITS_FLAG   0x00000080
> >  #define CP_CRC_RECOVERY_FLAG       0x00000040
> >  #define CP_FASTBOOT_FLAG   0x00000020
> >  #define CP_FSCK_FLAG               0x00000010
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to