On 2018/5/5 2:59, Jaegeuk Kim wrote: > On 05/02, Chao Yu wrote: >> On 2018/4/28 10:36, Jaegeuk Kim wrote: >>> On 04/27, Chao Yu wrote: >>>> On 2018/4/27 0:03, Jaegeuk Kim wrote: >>>>> On 04/24, Chao Yu wrote: >>>>>> Thread A Thread B Thread C >>>>>> - f2fs_remount >>>>>> - stop_gc_thread >>>>>> - f2fs_sbi_store >>>>>> - issue_discard_thread >>>>>> sbi->gc_thread = NULL; >>>>>> sbi->gc_thread->gc_wake = 1 >>>>>> access >>>>>> sbi->gc_thread->gc_urgent >>>>>> >>>>>> Previously, we allocate memory for sbi->gc_thread based on background >>>>>> gc thread mount option, the memory can be released if we turn off >>>>>> that mount option, but still there are several places access gc_thread >>>>>> pointer without considering race condition, result in NULL point >>>>>> dereference. >>>>> >>>>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and >>>> >>>> Better, >>>> >>>>> issue_discard_thread? >>>> >>>> There is more cases can be called from different scenarios: >>>> - select_policy() >>>> - need_SSR() >>> >>> No? They should be blocked by remount(2). >> >> How about below cases? >> >> - do_remount > > Was there no guard to block any filesystem operations?
The only block point is f2fs_readonly in f2fs_sync_file, without holding s_umount during ->fsync, so IMO, we need to cover need_SSR & select_gc_type, let me update the patch later. Thanks, > If it's true, yeah, we need to cover them. > >> - do_remount_sb >> - remount_fs >> - f2fs_remount >> - stop_gc_thread >> - fsync >> - f2fs_sync_file >> - file_write_and_wait_range >> - f2fs_write_data_pages >> - __write_data_page >> - should_update_inplace >> - check_inplace_update_policy >> - need_SSR >> : sbi->gc_thread = NULL; >> >> or >> >> - write_data_page >> - allocate_data_block >> - allocate_segment >> - get_ssr_segment >> - select_gc_type >> : sbi->gc_thread = NULL; >> >> Thanks, >> >>> >>>> >>>> Thanks, >>>> >>>>> >>>>>> >>>>>> In order to fix this issue, keep gc_thread structure valid in sbi all >>>>>> the time instead of alloc/free it dynamically. >>>>>> >>>>>> In addition, add a rw semaphore to exclude rw operation in fields of >>>>>> gc_thread. >>>>>> >>>>>> Signed-off-by: Chao Yu <yuch...@huawei.com> >>>>>> --- >>>>>> v2: >>>>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk. >>>>>> fs/f2fs/debug.c | 3 +-- >>>>>> fs/f2fs/f2fs.h | 9 ++++++- >>>>>> fs/f2fs/gc.c | 78 >>>>>> ++++++++++++++++++++++++++++++++++++------------------- >>>>>> fs/f2fs/gc.h | 1 + >>>>>> fs/f2fs/segment.c | 10 +++++-- >>>>>> fs/f2fs/super.c | 26 +++++++++++++------ >>>>>> fs/f2fs/sysfs.c | 26 ++++++++++++++----- >>>>>> 7 files changed, 107 insertions(+), 46 deletions(-) >>>>>> >>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c >>>>>> index a66107b5cfff..0fbd674c66fb 100644 >>>>>> --- a/fs/f2fs/debug.c >>>>>> +++ b/fs/f2fs/debug.c >>>>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) >>>>>> si->cache_mem = 0; >>>>>> >>>>>> /* build gc */ >>>>>> - if (sbi->gc_thread) >>>>>> - si->cache_mem += sizeof(struct f2fs_gc_kthread); >>>>>> + si->cache_mem += sizeof(struct f2fs_gc_kthread); >>>>>> >>>>>> /* build merge flush thread */ >>>>>> if (SM_I(sbi)->fcc_info) >>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>>>>> index 8f3ad9662d13..75d3b4875429 100644 >>>>>> --- a/fs/f2fs/f2fs.h >>>>>> +++ b/fs/f2fs/f2fs.h >>>>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct >>>>>> f2fs_sb_info *sbi) >>>>>> return (struct sit_info *)(SM_I(sbi)->sit_info); >>>>>> } >>>>>> >>>>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi) >>>>>> +{ >>>>>> + return (struct f2fs_gc_kthread *)(sbi->gc_thread); >>>>>> +} >>>>>> + >>>>>> static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) >>>>>> { >>>>>> return (struct free_segmap_info *)(SM_I(sbi)->free_info); >>>>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, >>>>>> loff_t pos, size_t len); >>>>>> /* >>>>>> * gc.c >>>>>> */ >>>>>> +int init_gc_context(struct f2fs_sb_info *sbi); >>>>>> +void destroy_gc_context(struct f2fs_sb_info * sbi); >>>>>> int start_gc_thread(struct f2fs_sb_info *sbi); >>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi); >>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi); >>>>>> block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); >>>>>> int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, >>>>>> unsigned int segno); >>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c >>>>>> index 70418b34c5f6..2febb84b2fd6 100644 >>>>>> --- a/fs/f2fs/gc.c >>>>>> +++ b/fs/f2fs/gc.c >>>>>> @@ -26,8 +26,8 @@ >>>>>> static int gc_thread_func(void *data) >>>>>> { >>>>>> struct f2fs_sb_info *sbi = data; >>>>>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; >>>>>> - wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; >>>>>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); >>>>>> + wait_queue_head_t *wq = &gc_th->gc_wait_queue_head; >>>>>> unsigned int wait_ms; >>>>>> >>>>>> wait_ms = gc_th->min_sleep_time; >>>>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data) >>>>>> return 0; >>>>>> } >>>>>> >>>>>> -int start_gc_thread(struct f2fs_sb_info *sbi) >>>>>> +int init_gc_context(struct f2fs_sb_info *sbi) >>>>>> { >>>>>> struct f2fs_gc_kthread *gc_th; >>>>>> - dev_t dev = sbi->sb->s_bdev->bd_dev; >>>>>> - int err = 0; >>>>>> >>>>>> gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), >>>>>> GFP_KERNEL); >>>>>> - if (!gc_th) { >>>>>> - err = -ENOMEM; >>>>>> - goto out; >>>>>> - } >>>>>> + if (!gc_th) >>>>>> + return -ENOMEM; >>>>>> + >>>>>> + gc_th->f2fs_gc_task = NULL; >>>>>> >>>>>> gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; >>>>>> gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; >>>>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi) >>>>>> gc_th->gc_urgent = 0; >>>>>> gc_th->gc_wake= 0; >>>>>> >>>>>> + init_rwsem(&gc_th->gc_rwsem); >>>>>> + >>>>>> sbi->gc_thread = gc_th; >>>>>> - init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); >>>>>> - sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +void destroy_gc_context(struct f2fs_sb_info *sbi) >>>>>> +{ >>>>>> + kfree(GC_I(sbi)); >>>>>> + sbi->gc_thread = NULL; >>>>>> +} >>>>>> + >>>>>> +int start_gc_thread(struct f2fs_sb_info *sbi) >>>>>> +{ >>>>>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); >>>>>> + dev_t dev = sbi->sb->s_bdev->bd_dev; >>>>>> + int err = 0; >>>>>> + >>>>>> + init_waitqueue_head(&gc_th->gc_wait_queue_head); >>>>>> + gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi, >>>>>> "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); >>>>>> if (IS_ERR(gc_th->f2fs_gc_task)) { >>>>>> err = PTR_ERR(gc_th->f2fs_gc_task); >>>>>> - kfree(gc_th); >>>>>> - sbi->gc_thread = NULL; >>>>>> + gc_th->f2fs_gc_task = NULL; >>>>>> } >>>>>> -out: >>>>>> + >>>>>> return err; >>>>>> } >>>>>> >>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi) >>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi) >>>>>> { >>>>>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; >>>>>> - if (!gc_th) >>>>>> - return; >>>>>> - kthread_stop(gc_th->f2fs_gc_task); >>>>>> - kfree(gc_th); >>>>>> - sbi->gc_thread = NULL; >>>>>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi); >>>>>> + bool stopped = false; >>>>>> + >>>>>> + down_write(&gc_th->gc_rwsem); >>>>>> + if (gc_th->f2fs_gc_task) { >>>>>> + kthread_stop(gc_th->f2fs_gc_task); >>>>>> + gc_th->f2fs_gc_task = NULL; >>>>>> + stopped = true; >>>>>> + } >>>>>> + up_write(&gc_th->gc_rwsem); >>>>>> + >>>>>> + return stopped; >>>>>> } >>>>>> >>>>>> static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) >>>>>> { >>>>>> int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; >>>>>> >>>>>> - if (!gc_th) >>>>>> - return gc_mode; >>>>>> + down_read(&gc_th->gc_rwsem); >>>>>> + if (!gc_th->f2fs_gc_task) >>>>>> + goto out; >>>>>> >>>>>> if (gc_th->gc_idle) { >>>>>> if (gc_th->gc_idle == 1) >>>>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread >>>>>> *gc_th, int gc_type) >>>>>> } >>>>>> if (gc_th->gc_urgent) >>>>>> gc_mode = GC_GREEDY; >>>>>> +out: >>>>>> + up_read(&gc_th->gc_rwsem); >>>>>> return gc_mode; >>>>>> } >>>>>> >>>>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info >>>>>> *sbi, int gc_type, >>>>>> p->max_search = dirty_i->nr_dirty[type]; >>>>>> p->ofs_unit = 1; >>>>>> } else { >>>>>> - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); >>>>>> + p->gc_mode = select_gc_type(GC_I(sbi), gc_type); >>>>>> p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; >>>>>> p->max_search = dirty_i->nr_dirty[DIRTY]; >>>>>> p->ofs_unit = sbi->segs_per_sec; >>>>>> } >>>>>> >>>>>> /* we need to check every dirty segments in the FG_GC case */ >>>>>> - if (gc_type != FG_GC && >>>>>> - (sbi->gc_thread && !sbi->gc_thread->gc_urgent) >>>>>> && >>>>>> + down_read(&GC_I(sbi)->gc_rwsem); >>>>>> + if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task && >>>>>> + !GC_I(sbi)->gc_urgent && >>>>>> p->max_search > sbi->max_victim_search) >>>>>> p->max_search = sbi->max_victim_search; >>>>>> + up_read(&GC_I(sbi)->gc_rwsem); >>>>>> >>>>>> /* let's select beginning hot/small space first in no_heap >>>>>> mode*/ >>>>>> if (test_opt(sbi, NOHEAP) && >>>>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h >>>>>> index b0045d4c8d1e..9a5b273328c2 100644 >>>>>> --- a/fs/f2fs/gc.h >>>>>> +++ b/fs/f2fs/gc.h >>>>>> @@ -26,6 +26,7 @@ >>>>>> #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ >>>>>> >>>>>> struct f2fs_gc_kthread { >>>>>> + struct rw_semaphore gc_rwsem; /* semaphore for >>>>>> f2fs_gc_task */ >>>>>> struct task_struct *f2fs_gc_task; >>>>>> wait_queue_head_t gc_wait_queue_head; >>>>>> >>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>>>> index 5960768d26df..f787eea1b2f6 100644 >>>>>> --- a/fs/f2fs/segment.c >>>>>> +++ b/fs/f2fs/segment.c >>>>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi) >>>>>> >>>>>> if (test_opt(sbi, LFS)) >>>>>> return false; >>>>>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) >>>>>> + down_read(&GC_I(sbi)->gc_rwsem); >>>>>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) { >>>>>> + down_read(&GC_I(sbi)->gc_rwsem); >>>>>> return true; >>>>>> + } >>>>>> + up_read(&GC_I(sbi)->gc_rwsem); >>>>>> >>>>>> return free_sections(sbi) <= (node_secs + 2 * dent_secs + >>>>>> imeta_secs + >>>>>> SM_I(sbi)->min_ssr_sections + >>>>>> reserved_sections(sbi)); >>>>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data) >>>>>> if (dcc->discard_wake) >>>>>> dcc->discard_wake = 0; >>>>>> >>>>>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) >>>>>> + down_read(&GC_I(sbi)->gc_rwsem); >>>>>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) >>>>>> init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); >>>>>> + up_read(&GC_I(sbi)->gc_rwsem); >>>>>> >>>>>> sb_start_intwrite(sbi->sb); >>>>>> >>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c >>>>>> index dedb8e50b440..199f29dce86d 100644 >>>>>> --- a/fs/f2fs/super.c >>>>>> +++ b/fs/f2fs/super.c >>>>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb) >>>>>> kfree(sbi->raw_super); >>>>>> >>>>>> destroy_device_list(sbi); >>>>>> + destroy_gc_context(sbi); >>>>>> mempool_destroy(sbi->write_io_dummy); >>>>>> #ifdef CONFIG_QUOTA >>>>>> for (i = 0; i < MAXQUOTAS; i++) >>>>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, >>>>>> int *flags, char *data) >>>>>> * option. Also sync the filesystem. >>>>>> */ >>>>>> if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { >>>>>> - if (sbi->gc_thread) { >>>>>> - stop_gc_thread(sbi); >>>>>> - need_restart_gc = true; >>>>>> + need_restart_gc = stop_gc_thread(sbi); >>>>>> + } else { >>>>>> + down_write(&GC_I(sbi)->gc_rwsem); >>>>>> + if (!GC_I(sbi)->f2fs_gc_task) { >>>>>> + err = start_gc_thread(sbi); >>>>>> + if (err) { >>>>>> + up_write(&GC_I(sbi)->gc_rwsem); >>>>>> + goto restore_opts; >>>>>> + } >>>>>> + need_stop_gc = true; >>>>>> } >>>>>> - } else if (!sbi->gc_thread) { >>>>>> - err = start_gc_thread(sbi); >>>>>> - if (err) >>>>>> - goto restore_opts; >>>>>> - need_stop_gc = true; >>>>>> + up_write(&GC_I(sbi)->gc_rwsem); >>>>>> } >>>>>> >>>>>> if (*flags & SB_RDONLY || >>>>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block >>>>>> *sb, void *data, int silent) >>>>>> goto free_meta_inode; >>>>>> } >>>>>> >>>>>> + err = init_gc_context(sbi); >>>>>> + if (err) >>>>>> + goto free_checkpoint; >>>>>> + >>>>>> /* Initialize device list */ >>>>>> err = f2fs_scan_devices(sbi); >>>>>> if (err) { >>>>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, >>>>>> void *data, int silent) >>>>>> destroy_segment_manager(sbi); >>>>>> free_devices: >>>>>> destroy_device_list(sbi); >>>>>> + destroy_gc_context(sbi); >>>>>> +free_checkpoint: >>>>>> kfree(sbi->ckpt); >>>>>> free_meta_inode: >>>>>> make_bad_inode(sbi->meta_inode); >>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c >>>>>> index 2c53de9251be..b8d09935e43f 100644 >>>>>> --- a/fs/f2fs/sysfs.c >>>>>> +++ b/fs/f2fs/sysfs.c >>>>>> @@ -46,7 +46,7 @@ struct f2fs_attr { >>>>>> static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int >>>>>> struct_type) >>>>>> { >>>>>> if (struct_type == GC_THREAD) >>>>>> - return (unsigned char *)sbi->gc_thread; >>>>>> + return (unsigned char *)GC_I(sbi); >>>>>> else if (struct_type == SM_INFO) >>>>>> return (unsigned char *)SM_I(sbi); >>>>>> else if (struct_type == DCC_INFO) >>>>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, >>>>>> if (!strcmp(a->attr.name, "trim_sections")) >>>>>> return -EINVAL; >>>>>> >>>>>> - *ui = t; >>>>>> - >>>>>> - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) >>>>>> + if (!strcmp(a->attr.name, "iostat_enable") && t == 0) >>>>>> f2fs_reset_iostat(sbi); >>>>>> - if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && >>>>>> sbi->gc_thread) { >>>>>> - sbi->gc_thread->gc_wake = 1; >>>>>> - >>>>>> wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); >>>>>> + >>>>>> + if (a->struct_type == GC_THREAD) { >>>>>> + down_write(&GC_I(sbi)->gc_rwsem); >>>>>> + if (!GC_I(sbi)->f2fs_gc_task) { >>>>>> + up_write(&GC_I(sbi)->gc_rwsem); >>>>>> + return -EINVAL; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + if (!strcmp(a->attr.name, "gc_urgent") && t == 1) { >>>>>> + GC_I(sbi)->gc_wake = 1; >>>>>> + >>>>>> wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head); >>>>>> wake_up_discard_thread(sbi, true); >>>>>> } >>>>>> >>>>>> + *ui = t; >>>>>> + >>>>>> + if (a->struct_type == GC_THREAD) >>>>>> + up_write(&GC_I(sbi)->gc_rwsem); >>>>>> + >>>>>> return count; >>>>>> } >>>>>> >>>>>> -- >>>>>> 2.15.0.55.gc2ece9dc4de6 >>>>> >>>>> . >>>>> >>> >>> . >>> > > . >