On 04/29, Chao Yu wrote:
> As Yanming reported in bugzilla:
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=215916
> 
> The kernel message is shown below:
> 
> kernel BUG at fs/f2fs/segment.c:2560!
> Call Trace:
>  allocate_segment_by_default+0x228/0x440
>  f2fs_allocate_data_block+0x13d1/0x31f0
>  do_write_page+0x18d/0x710
>  f2fs_outplace_write_data+0x151/0x250
>  f2fs_do_write_data_page+0xef9/0x1980
>  move_data_page+0x6af/0xbc0
>  do_garbage_collect+0x312f/0x46f0
>  f2fs_gc+0x6b0/0x3bc0
>  f2fs_balance_fs+0x921/0x2260
>  f2fs_write_single_data_page+0x16be/0x2370
>  f2fs_write_cache_pages+0x428/0xd00
>  f2fs_write_data_pages+0x96e/0xd50
>  do_writepages+0x168/0x550
>  __writeback_single_inode+0x9f/0x870
>  writeback_sb_inodes+0x47d/0xb20
>  __writeback_inodes_wb+0xb2/0x200
>  wb_writeback+0x4bd/0x660
>  wb_workfn+0x5f3/0xab0
>  process_one_work+0x79f/0x13e0
>  worker_thread+0x89/0xf60
>  kthread+0x26a/0x300
>  ret_from_fork+0x22/0x30
> RIP: 0010:new_curseg+0xe8d/0x15f0
> 
> The root cause is: ckpt.valid_block_count is inconsistent with SIT table,
> stat info indicates filesystem has free blocks, but SIT table indicates
> filesystem has no free segment.
> 
> So that during garbage colloection, it triggers panic when LFS allocator
> fails to find free segment.
> 
> This patch tries to fix this issue by checking consistency in between
> ckpt.valid_block_count and block accounted from SIT.
> 
> Cc: [email protected]
> Reported-by: Ming Yan <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
>  fs/f2fs/segment.c | 24 +++++++++++++++++++++---
>  1 file changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8c17fed8987e..eddaf3b45b25 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4462,6 +4462,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>       unsigned int readed, start_blk = 0;
>       int err = 0;
>       block_t total_node_blocks = 0;
> +     block_t total_data_blocks = 0;
>  
>       do {
>               readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
> @@ -4488,6 +4489,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>                       seg_info_from_raw_sit(se, &sit);
>                       if (IS_NODESEG(se->type))
>                               total_node_blocks += se->valid_blocks;
> +                     else
> +                             total_data_blocks += se->valid_blocks;
>  
>                       if (f2fs_block_unit_discard(sbi)) {
>                               /* build discard map only one time */
> @@ -4529,6 +4532,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>               old_valid_blocks = se->valid_blocks;
>               if (IS_NODESEG(se->type))
>                       total_node_blocks -= old_valid_blocks;
> +             else
> +                     total_data_blocks -= old_valid_blocks;
>  
>               err = check_block_count(sbi, start, &sit);
>               if (err)
> @@ -4536,6 +4541,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>               seg_info_from_raw_sit(se, &sit);
>               if (IS_NODESEG(se->type))
>                       total_node_blocks += se->valid_blocks;
> +             else
> +                     total_data_blocks += se->valid_blocks;
>  
>               if (f2fs_block_unit_discard(sbi)) {
>                       if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
> @@ -4557,13 +4564,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
>       }
>       up_read(&curseg->journal_rwsem);
>  
> -     if (!err && total_node_blocks != valid_node_count(sbi)) {
> +     if (err)
> +             return err;
> +
> +     if (total_node_blocks != valid_node_count(sbi)) {
>               f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
>                        total_node_blocks, valid_node_count(sbi));
> -             err = -EFSCORRUPTED;
> +             return -EFSCORRUPTED;
>       }
>  
> -     return err;
> +     if (total_data_blocks + total_node_blocks !=
> +                             valid_user_blocks(sbi)) {
> +             f2fs_err(sbi, "SIT is corrupted data# %u vs %u",
> +                      total_data_blocks,
> +                      valid_user_blocks(sbi) - total_node_blocks);

This doesn't work, since some NEW_ADDR is not counted from SIT.

> +             return -EFSCORRUPTED;
> +     }
> +
> +     return 0;
>  }
>  
>  static void init_free_segmap(struct f2fs_sb_info *sbi)
> -- 
> 2.25.1


_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to