f2fs_stop_gc_thread() stops the background GC task, wakes foreground
GC_MERGE waiters, frees sbi->gc_thread, and then clears the published
pointer.  A foreground f2fs_balance_fs() caller can already have copied
that pointer and queued itself on gc_th->fggc_wq, so freeing gc_th at
stop time can leave finish_wait() operating on a freed waitqueue.

Keep the allocated GC-thread state until the superblock is destroyed and
use gc_th->f2fs_gc_task as the running-state marker.  The stop path now
withdraws the task pointer with xchg(), stops the task, and wakes any
foreground waiters, but leaves the waitqueue storage valid.  The start
path reuses a stopped gc_thread object instead of reinitializing its
waitqueues, and remount restart decisions check the task pointer rather
than only the object pointer.

f2fs_balance_fs() also snapshots sbi->gc_thread once and rechecks
f2fs_gc_task after prepare_to_wait().  If teardown wins the race after
the first check, the foreground caller removes its wait entry without
sleeping on a worker that has already been withdrawn.

Validation reproduced this kernel report:

BUG: KASAN: slab-use-after-free in finish_wait+0x276/0x290
Write of size 8 at addr ffff8881150819b8 by task dd/802
The buggy address belongs to the object at ffff888115081900 which
belongs to the cache kmalloc-256 of size 256
The buggy address is located 184 bytes inside of freed 256-byte region
Call trace:
  finish_wait()
  f2fs_balance_fs()
  f2fs_write_single_data_page()
  f2fs_write_cache_pages()
  __f2fs_write_data_pages()
  do_writepages()
  filemap_fdatawrite_wbc()
  __filemap_fdatawrite_range()
  file_write_and_wait_range()
  f2fs_do_sync_file()
  f2fs_sync_file()
  do_fsync()
Freed by task stack:
  kfree()
  f2fs_stop_gc_thread()
  f2fs_do_shutdown()
  f2fs_shutdown()
  fs_bdev_mark_dead()

Fixes: 5911d2d1d1a3 ("f2fs: introduce gc_merge mount option")
Signed-off-by: Zhang Cen <[email protected]>
---
v4:
- Replace the v3 SRCU/refcounted lifetime model with a smaller fix that
  keeps the existing heap-allocated gc_thread object alive until
  superblock teardown.
- Use f2fs_gc_task as the running-state marker and withdraw it with
  xchg() before waking GC_MERGE waiters.
- Reuse a stopped gc_thread object across remount restarts so the
  waitqueues are not reinitialized while old waiters can still finish.
- Recheck f2fs_gc_task after prepare_to_wait() so a waiter that races
  with teardown does not sleep after the worker has been withdrawn.

v3:
- Add the Fixes tag for the GC_MERGE foreground wait path.
- Fix checkpatch style issues in the broader lifetime variant.

v2:
- Sashiko.dev pointed out that GC_MERGE foreground waiters and
  GC-thread users needed lifetime-safe access after teardown.

 fs/f2fs/gc.c      | 48 +++++++++++++++++++++++++++++------------------
 fs/f2fs/segment.c | 19 ++++++++++++-------
 fs/f2fs/super.c   |  7 +++++--
 3 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba93010924c06..20f8394482a09 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,12 +193,23 @@ static int gc_thread_func(void *data)
 
 int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 {
-       struct f2fs_gc_kthread *gc_th;
+       struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+       struct task_struct *task;
+       bool allocated = false;
        dev_t dev = sbi->sb->s_bdev->bd_dev;
 
-       gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-       if (!gc_th)
-               return -ENOMEM;
+       if (gc_th && READ_ONCE(gc_th->f2fs_gc_task))
+               return 0;
+
+       if (!gc_th) {
+               gc_th = f2fs_kmalloc(sbi, sizeof(*gc_th), GFP_KERNEL);
+               if (!gc_th)
+                       return -ENOMEM;
+               init_waitqueue_head(&gc_th->gc_wait_queue_head);
+               init_waitqueue_head(&gc_th->fggc_wq);
+               sbi->gc_thread = gc_th;
+               allocated = true;
+       }
 
        gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
        gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
@@ -221,34 +232,35 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 
        gc_th->gc_wake = false;
 
-       sbi->gc_thread = gc_th;
-       init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
-       init_waitqueue_head(&sbi->gc_thread->fggc_wq);
-       sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
-                       "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
-       if (IS_ERR(gc_th->f2fs_gc_task)) {
-               int err = PTR_ERR(gc_th->f2fs_gc_task);
+       task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u",
+                          MAJOR(dev), MINOR(dev));
+       if (IS_ERR(task)) {
+               int err = PTR_ERR(task);
 
-               kfree(gc_th);
-               sbi->gc_thread = NULL;
+               if (allocated) {
+                       kfree(gc_th);
+                       sbi->gc_thread = NULL;
+               }
                return err;
        }
 
-       set_user_nice(gc_th->f2fs_gc_task,
-                       PRIO_TO_NICE(sbi->critical_task_priority));
+       WRITE_ONCE(gc_th->f2fs_gc_task, task);
+       set_user_nice(task, PRIO_TO_NICE(sbi->critical_task_priority));
        return 0;
 }
 
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
 {
        struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+       struct task_struct *task;
 
        if (!gc_th)
                return;
-       kthread_stop(gc_th->f2fs_gc_task);
+       task = xchg(&gc_th->f2fs_gc_task, NULL);
+       if (!task)
+               return;
+       kthread_stop(task);
        wake_up_all(&gc_th->fggc_wq);
-       kfree(gc_th);
-       sbi->gc_thread = NULL;
 }
 
 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 788f8b0502492..84307525edd27 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,6 +424,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
  */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
+       struct f2fs_gc_kthread *gc_th;
+
        if (f2fs_cp_error(sbi))
                return;
 
@@ -444,15 +446,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
        if (has_enough_free_secs(sbi, 0, 0))
                return;
 
-       if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
-                               sbi->gc_thread->f2fs_gc_task) {
+       gc_th = sbi->gc_thread;
+       if (test_opt(sbi, GC_MERGE) && gc_th &&
+           READ_ONCE(gc_th->f2fs_gc_task)) {
                DEFINE_WAIT(wait);
 
-               prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-               wake_up(&sbi->gc_thread->gc_wait_queue_head);
-               io_schedule();
-               finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+               prepare_to_wait(&gc_th->fggc_wq, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               if (READ_ONCE(gc_th->f2fs_gc_task)) {
+                       wake_up(&gc_th->gc_wait_queue_head);
+                       io_schedule();
+               }
+               finish_wait(&gc_th->fggc_wq, &wait);
        } else {
                struct f2fs_gc_control gc_control = {
                        .victim_segno = NULL_SEGNO,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ccf806b676f53..d6863da05a7c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2925,11 +2925,12 @@ static int __f2fs_remount(struct fs_context *fc, struct 
super_block *sb)
        if ((flags & SB_RDONLY) ||
                        (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
                        !test_opt(sbi, GC_MERGE))) {
-               if (sbi->gc_thread) {
+               if (sbi->gc_thread && READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
                        f2fs_stop_gc_thread(sbi);
                        need_restart_gc = true;
                }
-       } else if (!sbi->gc_thread) {
+       } else if (!sbi->gc_thread ||
+                       !READ_ONCE(sbi->gc_thread->f2fs_gc_task)) {
                err = f2fs_start_gc_thread(sbi);
                if (err)
                        goto restore_opts;
@@ -5451,6 +5452,7 @@ static int f2fs_fill_super(struct super_block *sb, struct 
fs_context *fc)
 free_sb_buf:
        kfree(raw_super);
 free_sbi:
+       kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
@@ -5535,6 +5537,7 @@ static void kill_f2fs_super(struct super_block *sb)
        /* Release block devices last, after fscrypt_destroy_keyring(). */
        if (sbi) {
                destroy_device_list(sbi);
+               kfree(sbi->gc_thread);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
                lockdep_unregister_key(&sbi->cp_global_sem_key);
 #endif
-- 
2.43.0


_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to