>-----Original Message-----
>From: Yuchao (T)
>Sent: Friday, January 05, 2018 9:17 AM
>To: heyunlei; [email protected]; [email protected]
>Cc: Wangbintian; Jianing (Euler)
>Subject: Re: [f2fs-dev][PATCH v6] f2fs: avoid dead loop in function 
>find_fsync_dnodes
>
>On 2018/1/4 18:01, Yunlei He wrote:
>> Came across a dead loop in recovery like this:
>>
>> ......
>> [   24.680480s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597696
>> [   24.698394s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597697
>> [   24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> [   24.724426s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
>> ......
>>
>> Mount process will block in dead loop and fsck can do nothing with this
>> error, This patch abandon recovery if node chain is cyclical.
>>
>> Signed-off-by: Yunlei He <[email protected]>
>> ---
>>  fs/f2fs/recovery.c | 27 ++++++++++++++++++++++-----
>>  1 file changed, 22 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>> index cbeef73..318243c 100644
>> --- a/fs/f2fs/recovery.c
>> +++ b/fs/f2fs/recovery.c
>> @@ -221,11 +221,11 @@ static void recover_inode(struct inode *inode, struct 
>> page *page)
>>  }
>>
>>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head 
>> *head,
>> -                            bool check_only)
>> +                    bool check_only, block_t *looped_blkaddr)
>>  {
>>      struct curseg_info *curseg;
>>      struct page *page = NULL;
>> -    block_t blkaddr;
>> +    block_t blkaddr, last_blkaddr;
>>      int err = 0;
>>
>>      /* get node pages in the current segment */
>> @@ -239,6 +239,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, 
>> struct list_head *head,
>>                      return 0;
>>
>>              page = get_tmp_page(sbi, blkaddr);
>> +            if (PageChecked(page)) {
>> +                    f2fs_msg(sbi->sb, KERN_ERR, "Abandon looped node block 
>> list");
>> +                    *looped_blkaddr = last_blkaddr;
>> +                    break;
>> +            }
>> +
>> +            /*
>> +             * it's not needed to clear PG_checked flag in temp page since 
>> we
>> +             * will truncate all those pages in the end of recovery.
>> +             */
>> +            SetPageChecked(page);
>
>All valid recoverable pages have been tagged with PG_checked flag, so in
>recover_data() we can only recover those tagged pages like:
>
>
>if (!is_recoverable_dnode(page) || !PageChecked(page))
>       break;
>
>/* do recovery */
>....
>
>ClearPageChecked(page);
>
>So the process can be more clear, right?
>
>Thanks,

Could these pages tagged with PG_checked flag been reclaimed in some extreme 
scenarios?

Thanks.

>
>>
>>              if ()
>>                      break;
>> @@ -279,6 +290,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, 
>> struct list_head *head,
>>                      entry->last_dentry = blkaddr;
>>  next:
>>              /* check next segment */
>> +            last_blkaddr = blkaddr;
>>              blkaddr = next_blkaddr_of_node(page);
>>              f2fs_put_page(page, 1);
>>
>> @@ -525,7 +537,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, 
>> struct inode *inode,
>>  }
>>
>>  static int recover_data(struct f2fs_sb_info *sbi, struct list_head 
>> *inode_list,
>> -                                            struct list_head *dir_list)
>> +                            struct list_head *dir_list, block_t 
>> looped_blkaddr)
>>  {
>>      struct curseg_info *curseg;
>>      struct page *page = NULL;
>> @@ -577,6 +589,10 @@ static int recover_data(struct f2fs_sb_info *sbi, 
>> struct list_head *inode_list,
>>              if (entry->blkaddr == blkaddr)
>>                      del_fsync_inode(entry);
>>  next:
>> +            /* in case of looped node chain */
>> +            if (blkaddr == looped_blkaddr)
>> +                    break;
>> +
>>              /* check next segment */
>>              blkaddr = next_blkaddr_of_node(page);
>>              f2fs_put_page(page, 1);
>> @@ -596,6 +612,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
>> check_only)
>>      bool need_writecp = false;
>>  #ifdef CONFIG_QUOTA
>>      int quota_enabled;
>> +    block_t looped_blkaddr = 0;
>>  #endif
>>
>>      if (s_flags & SB_RDONLY) {
>> @@ -624,7 +641,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
>> check_only)
>>      mutex_lock(&sbi->cp_mutex);
>>
>>      /* step #1: find fsynced inode numbers */
>> -    err = find_fsync_dnodes(sbi, &inode_list, check_only);
>> +    err = find_fsync_dnodes(sbi, &inode_list, check_only, &looped_blkaddr);
>>      if (err || list_empty(&inode_list))
>>              goto skip;
>>
>> @@ -636,7 +653,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
>> check_only)
>>      need_writecp = true;
>>
>>      /* step #2: recover data */
>> -    err = recover_data(sbi, &inode_list, &dir_list);
>> +    err = recover_data(sbi, &inode_list, &dir_list, looped_blkaddr);
>>      if (!err)
>>              f2fs_bug_on(sbi, !list_empty(&inode_list));
>>  skip:
>>


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to