On 2018/1/4 18:01, Yunlei He wrote:
> Came across a dead loop in recovery like this:
>
> ......
> [ 24.680480s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597696
> [ 24.698394s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597697
> [ 24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724334s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724365s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724395s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> [ 24.724426s][pid:320,cpu0,init]find_fsync_dnodes: blkaddr =13597698
> ......
>
> Mount process will block in dead loop and fsck can do nothing with this
> error, This patch abandon recovery if node chain is cyclical.
>
> Signed-off-by: Yunlei He <[email protected]>
> ---
> fs/f2fs/recovery.c | 27 ++++++++++++++++++++++-----
> 1 file changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index cbeef73..318243c 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -221,11 +221,11 @@ static void recover_inode(struct inode *inode, struct
> page *page)
> }
>
> static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head
> *head,
> - bool check_only)
> + bool check_only, block_t *looped_blkaddr)
> {
> struct curseg_info *curseg;
> struct page *page = NULL;
> - block_t blkaddr;
> + block_t blkaddr, last_blkaddr;
> int err = 0;
>
> /* get node pages in the current segment */
> @@ -239,6 +239,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi,
> struct list_head *head,
> return 0;
>
> page = get_tmp_page(sbi, blkaddr);
> + if (PageChecked(page)) {
> + f2fs_msg(sbi->sb, KERN_ERR, "Abandon looped node block
> list");
> + *looped_blkaddr = last_blkaddr;
> + break;
> + }
> +
> + /*
> + * it's not needed to clear PG_checked flag in temp page since
> we
> + * will truncate all those pages in the end of recovery.
> + */
> + SetPageChecked(page);
All valid recoverable pages have been tagged with PG_checked flag, so in
recover_data() we can only recover those tagged pages like:
if (!is_recoverable_dnode(page) || !PageChecked(page))
break;
/* do recovery */
....
ClearPageChecked(page);
So the process can be more clear, right?
Thanks,
>
> if ()
> break;
> @@ -279,6 +290,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi,
> struct list_head *head,
> entry->last_dentry = blkaddr;
> next:
> /* check next segment */
> + last_blkaddr = blkaddr;
> blkaddr = next_blkaddr_of_node(page);
> f2fs_put_page(page, 1);
>
> @@ -525,7 +537,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi,
> struct inode *inode,
> }
>
> static int recover_data(struct f2fs_sb_info *sbi, struct list_head
> *inode_list,
> - struct list_head *dir_list)
> + struct list_head *dir_list, block_t
> looped_blkaddr)
> {
> struct curseg_info *curseg;
> struct page *page = NULL;
> @@ -577,6 +589,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct
> list_head *inode_list,
> if (entry->blkaddr == blkaddr)
> del_fsync_inode(entry);
> next:
> + /* in case of looped node chain */
> + if (blkaddr == looped_blkaddr)
> + break;
> +
> /* check next segment */
> blkaddr = next_blkaddr_of_node(page);
> f2fs_put_page(page, 1);
> @@ -596,6 +612,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool
> check_only)
> bool need_writecp = false;
> #ifdef CONFIG_QUOTA
> int quota_enabled;
> + block_t looped_blkaddr = 0;
> #endif
>
> if (s_flags & SB_RDONLY) {
> @@ -624,7 +641,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool
> check_only)
> mutex_lock(&sbi->cp_mutex);
>
> /* step #1: find fsynced inode numbers */
> - err = find_fsync_dnodes(sbi, &inode_list, check_only);
> + err = find_fsync_dnodes(sbi, &inode_list, check_only, &looped_blkaddr);
> if (err || list_empty(&inode_list))
> goto skip;
>
> @@ -636,7 +653,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool
> check_only)
> need_writecp = true;
>
> /* step #2: recover data */
> - err = recover_data(sbi, &inode_list, &dir_list);
> + err = recover_data(sbi, &inode_list, &dir_list, looped_blkaddr);
> if (!err)
> f2fs_bug_on(sbi, !list_empty(&inode_list));
> skip:
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel