On 2016/9/20 10:41, Jaegeuk Kim wrote: > On Tue, Sep 20, 2016 at 09:47:20AM +0800, Chao Yu wrote: >> Hi Jaegeuk, >> >> On 2016/9/20 5:49, Jaegeuk Kim wrote: >>> Hi Chao, >>> >>> On Sun, Sep 18, 2016 at 11:30:06PM +0800, Chao Yu wrote: >>>> From: Chao Yu <yuch...@huawei.com> >>>> >>>> This patch introduces spinlock to protect updating process of ckpt_flags >>>> field in struct f2fs_checkpoint, it avoids incorrectly updating in race >>>> condition. >>> >>> So, I'm seeing a race condition between f2fs_stop_checkpoint(), >>> write_checkpoint(), and f2fs_fill_super(). >>> >>> How about just adding f2fs_lock_op() in f2fs_stop_checkpoint()? >> >> I'm afraid there will be a potential deadlock here: >> >> Thread A Thread B >> - write_checkpoint >> - block_operations >> - f2fs_lock_all >> - do_checkpoint >> - wait_on_all_pages_writeback >> - f2fs_write_end_io >> - f2fs_stop_checkpoint >> - f2fs_lock_op >> - end_page_writeback >> - atomic_dec_and_test >> - wake_up >> >> Right? > > Okay, I see. Let me try to understand in more details. > Basically, there'd be no problem if there is no f2fs_stop_checkpoint(), since > every {set|clear}_ckpt_flags() are called under f2fs_lock_op() or in > fill_super(). And, you're probably concerned about any breakage of ckpt->flags > due to accidental f2fs_stop_checkpoint() trigger. So, we're able to lose > ERROR_FLAG because of data race. > > Oh, I found one potential corruption case in f2fs_write_checkpoint(). > Before writing the last checkpoint block, we used to check its IO error. > But, if set_ckpt_flags() and f2fs_stop_checkpoint() were called concurrently, > ckpt_flags was able to lose ERROR_FLAG, resulting in finalizing its checkpoint > pack. So, we can get valid checkpoint pack with EIO'ed metadata block.
That's right. > > BTW, we can do multiple set/clear flags in do_checkpoint() with single > spin_lock > call, tho. Agree, let me refactor the code. :) Thanks, > > Thanks, > >>> >>>> Signed-off-by: Chao Yu <yuch...@huawei.com> >>>> --- >>>> fs/f2fs/checkpoint.c | 24 ++++++++++++------------ >>>> fs/f2fs/f2fs.h | 29 +++++++++++++++++++++-------- >>>> fs/f2fs/recovery.c | 2 +- >>>> fs/f2fs/segment.c | 4 ++-- >>>> fs/f2fs/super.c | 5 +++-- >>>> 5 files changed, 39 insertions(+), 25 deletions(-) >>>> >>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c >>>> index df56a43..0338f8c 100644 >>>> --- a/fs/f2fs/checkpoint.c >>>> +++ b/fs/f2fs/checkpoint.c >>>> @@ -28,7 +28,7 @@ struct kmem_cache *inode_entry_slab; >>>> >>>> void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) >>>> { >>>> - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); >>>> + set_ckpt_flags(sbi, CP_ERROR_FLAG); >>>> sbi->sb->s_flags |= MS_RDONLY; >>>> if (!end_io) >>>> f2fs_flush_merged_bios(sbi); >>>> @@ -571,7 +571,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) >>>> block_t start_blk, orphan_blocks, i, j; >>>> int err; >>>> >>>> - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) >>>> + if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) >>>> return 0; >>>> >>>> start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); >>>> @@ -595,7 +595,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) >>>> f2fs_put_page(page, 1); >>>> } >>>> /* clear Orphan Flag */ >>>> - clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); >>>> + clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); >>>> return 0; >>>> } >>>> >>>> @@ -1054,9 +1054,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, >>>> struct cp_control *cpc) >>>> /* 2 cp + n data seg summary + orphan inode blocks */ >>>> data_sum_blocks = npages_for_summary_flush(sbi, false); >>>> if (data_sum_blocks < NR_CURSEG_DATA_TYPE) >>>> - set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); >>>> + set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG); >>>> else >>>> - clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); >>>> + clear_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG); >>>> >>>> orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); >>>> ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + >>>> @@ -1072,22 +1072,22 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, >>>> struct cp_control *cpc) >>>> orphan_blocks); >>>> >>>> if (cpc->reason == CP_UMOUNT) >>>> - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); >>>> + set_ckpt_flags(sbi, CP_UMOUNT_FLAG); >>>> else >>>> - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); >>>> + clear_ckpt_flags(sbi, CP_UMOUNT_FLAG); >>>> >>>> if (cpc->reason == CP_FASTBOOT) >>>> - set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); >>>> + set_ckpt_flags(sbi, CP_FASTBOOT_FLAG); >>>> else >>>> - clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); >>>> + clear_ckpt_flags(sbi, CP_FASTBOOT_FLAG); >>>> >>>> if (orphan_num) >>>> - set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); >>>> + set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); >>>> else >>>> - clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); >>>> + clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); >>>> >>>> if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) >>>> - set_ckpt_flags(ckpt, CP_FSCK_FLAG); >>>> + set_ckpt_flags(sbi, CP_FSCK_FLAG); >>>> >>>> /* update SIT/NAT bitmap */ >>>> get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>>> index c30f744b..b615f3f 100644 >>>> --- a/fs/f2fs/f2fs.h >>>> +++ b/fs/f2fs/f2fs.h >>>> @@ -814,6 +814,7 @@ struct f2fs_sb_info { >>>> >>>> /* for checkpoint */ >>>> struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ >>>> + spinlock_t cp_lock; /* for flag in ckpt */ >>>> struct inode *meta_inode; /* cache meta blocks */ >>>> struct mutex cp_mutex; /* checkpoint procedure lock */ >>>> struct rw_semaphore cp_rwsem; /* blocking FS operations */ >>>> @@ -1083,24 +1084,36 @@ static inline unsigned long long >>>> cur_cp_version(struct f2fs_checkpoint *cp) >>>> return le64_to_cpu(cp->checkpoint_ver); >>>> } >>>> >>>> -static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned >>>> int f) >>>> +static inline bool is_set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned >>>> int f) >>>> { >>>> + struct f2fs_checkpoint *cp = F2FS_CKPT(sbi); >>>> unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); >>>> + >>>> return ckpt_flags & f; >>>> } >>>> >>>> -static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned >>>> int f) >>>> +static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int >>>> f) >>>> { >>>> - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); >>>> + struct f2fs_checkpoint *cp = F2FS_CKPT(sbi); >>>> + unsigned int ckpt_flags; >>>> + >>>> + spin_lock(&sbi->cp_lock); >>>> + ckpt_flags = le32_to_cpu(cp->ckpt_flags); >>>> ckpt_flags |= f; >>>> cp->ckpt_flags = cpu_to_le32(ckpt_flags); >>>> + spin_unlock(&sbi->cp_lock); >>>> } >>>> >>>> -static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned >>>> int f) >>>> +static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned >>>> int f) >>>> { >>>> - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); >>>> + struct f2fs_checkpoint *cp = F2FS_CKPT(sbi); >>>> + unsigned int ckpt_flags; >>>> + >>>> + spin_lock(&sbi->cp_lock); >>>> + ckpt_flags = le32_to_cpu(cp->ckpt_flags); >>>> ckpt_flags &= (~f); >>>> cp->ckpt_flags = cpu_to_le32(ckpt_flags); >>>> + spin_unlock(&sbi->cp_lock); >>>> } >>>> >>>> static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) >>>> @@ -1148,8 +1161,8 @@ static inline bool __remain_node_summaries(int >>>> reason) >>>> >>>> static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) >>>> { >>>> - return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || >>>> - is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); >>>> + return (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG) || >>>> + is_set_ckpt_flags(sbi, CP_FASTBOOT_FLAG)); >>>> } >>>> >>>> /* >>>> @@ -1851,7 +1864,7 @@ static inline int f2fs_readonly(struct super_block >>>> *sb) >>>> >>>> static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) >>>> { >>>> - return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); >>>> + return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); >>>> } >>>> >>>> static inline bool is_dot_dotdot(const struct qstr *str) >>>> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c >>>> index ad748e5..37d99d2 100644 >>>> --- a/fs/f2fs/recovery.c >>>> +++ b/fs/f2fs/recovery.c >>>> @@ -649,7 +649,7 @@ out: >>>> invalidate_mapping_pages(META_MAPPING(sbi), >>>> blkaddr, blkaddr); >>>> >>>> - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); >>>> + set_ckpt_flags(sbi, CP_ERROR_FLAG); >>>> mutex_unlock(&sbi->cp_mutex); >>>> } else if (need_writecp) { >>>> struct cp_control cpc = { >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>> index 101b58f..dc68f30 100644 >>>> --- a/fs/f2fs/segment.c >>>> +++ b/fs/f2fs/segment.c >>>> @@ -1825,7 +1825,7 @@ static int restore_curseg_summaries(struct >>>> f2fs_sb_info *sbi) >>>> int type = CURSEG_HOT_DATA; >>>> int err; >>>> >>>> - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { >>>> + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { >>>> int npages = npages_for_summary_flush(sbi, true); >>>> >>>> if (npages >= 2) >>>> @@ -1922,7 +1922,7 @@ static void write_normal_summaries(struct >>>> f2fs_sb_info *sbi, >>>> >>>> void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) >>>> { >>>> - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) >>>> + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) >>>> write_compacted_summaries(sbi, start_blk); >>>> else >>>> write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c >>>> index 555217f..f809729 100644 >>>> --- a/fs/f2fs/super.c >>>> +++ b/fs/f2fs/super.c >>>> @@ -735,7 +735,7 @@ static void f2fs_put_super(struct super_block *sb) >>>> * clean checkpoint again. >>>> */ >>>> if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || >>>> - !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { >>>> + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { >>>> struct cp_control cpc = { >>>> .reason = CP_UMOUNT, >>>> }; >>>> @@ -1477,6 +1477,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) >>>> mutex_init(&sbi->umount_mutex); >>>> mutex_init(&sbi->wio_mutex[NODE]); >>>> mutex_init(&sbi->wio_mutex[DATA]); >>>> + spin_lock_init(&sbi->cp_lock); >>>> >>>> #ifdef CONFIG_F2FS_FS_ENCRYPTION >>>> memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, >>>> @@ -1818,7 +1819,7 @@ try_onemore: >>>> * previous checkpoint was not done by clean system shutdown. >>>> */ >>>> if (bdev_read_only(sb->s_bdev) && >>>> - !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { >>>> + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { >>>> err = -EROFS; >>>> goto free_kobj; >>>> } >>>> -- >>>> 2.7.2 >>> >>> . >>> > > . >