On 2018/1/4 18:01, Yunlei He wrote:
> Came across a dead loop in recovery like this:
> 
> ......
> [   24.680480s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597696
> [   24.698394s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597697
> [   24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [   24.724426s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> ......
> 
> Mount process will block in dead loop and fsck can do nothing with this
> error, This patch abandon recovery if node chain is cyclical.
> 
> Signed-off-by: Yunlei He <[email protected]>
> ---
>  fs/f2fs/recovery.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index cbeef73..318243c 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -221,11 +221,11 @@ static void recover_inode(struct inode *inode, struct 
> page *page)
>  }
>  
>  static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head 
> *head,
> -                             bool check_only)
> +                     bool check_only, block_t *looped_blkaddr)
>  {
>       struct curseg_info *curseg;
>       struct page *page = NULL;
> -     block_t blkaddr;
> +     block_t blkaddr, last_blkaddr;
>       int err = 0;
>  
>       /* get node pages in the current segment */
> @@ -239,6 +239,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, 
> struct list_head *head,
>                       return 0;
>  
>               page = get_tmp_page(sbi, blkaddr);
> +             if (PageChecked(page)) {
> +                     f2fs_msg(sbi->sb, KERN_ERR, "Abandon looped node block 
> list");
> +                     *looped_blkaddr = last_blkaddr;
> +                     break;
> +             }
> +
> +             /*
> +              * it's not needed to clear PG_checked flag in temp page since 
> we
> +              * will truncate all those pages in the end of recovery.
> +              */
> +             SetPageChecked(page);

All valid recoverable pages have been tagged with PG_checked flag, so in
recover_data() we can only recover those tagged pages like:


if (!is_recoverable_dnode(page) || !PageChecked(page))
        break;

/* do recovery */
....

ClearPageChecked(page);

So the process can be more clear, right?

Thanks,

>  
>               if ()
>                       break;
> @@ -279,6 +290,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, 
> struct list_head *head,
>                       entry->last_dentry = blkaddr;
>  next:
>               /* check next segment */
> +             last_blkaddr = blkaddr;
>               blkaddr = next_blkaddr_of_node(page);
>               f2fs_put_page(page, 1);
>  
> @@ -525,7 +537,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, 
> struct inode *inode,
>  }
>  
>  static int recover_data(struct f2fs_sb_info *sbi, struct list_head 
> *inode_list,
> -                                             struct list_head *dir_list)
> +                             struct list_head *dir_list, block_t 
> looped_blkaddr)
>  {
>       struct curseg_info *curseg;
>       struct page *page = NULL;
> @@ -577,6 +589,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct 
> list_head *inode_list,
>               if (entry->blkaddr == blkaddr)
>                       del_fsync_inode(entry);
>  next:
> +             /* in case of looped node chain */
> +             if (blkaddr == looped_blkaddr)
> +                     break;
> +
>               /* check next segment */
>               blkaddr = next_blkaddr_of_node(page);
>               f2fs_put_page(page, 1);
> @@ -596,6 +612,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
> check_only)
>       bool need_writecp = false;
>  #ifdef CONFIG_QUOTA
>       int quota_enabled;
> +     block_t looped_blkaddr = 0;
>  #endif
>  
>       if (s_flags & SB_RDONLY) {
> @@ -624,7 +641,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
> check_only)
>       mutex_lock(&sbi->cp_mutex);
>  
>       /* step #1: find fsynced inode numbers */
> -     err = find_fsync_dnodes(sbi, &inode_list, check_only);
> +     err = find_fsync_dnodes(sbi, &inode_list, check_only, &looped_blkaddr);
>       if (err || list_empty(&inode_list))
>               goto skip;
>  
> @@ -636,7 +653,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool 
> check_only)
>       need_writecp = true;
>  
>       /* step #2: recover data */
> -     err = recover_data(sbi, &inode_list, &dir_list);
> +     err = recover_data(sbi, &inode_list, &dir_list, looped_blkaddr);
>       if (!err)
>               f2fs_bug_on(sbi, !list_empty(&inode_list));
>  skip:
> 


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to