copygc tries to wait in a way that balances waiting for work to
accumulate with running before we run out of free space - but for a
variety of reasons (multiple devices, io clock slop, the vagaries of
fragmentation) this isn't completely reliable.

So to avoid getting stuck, add direct wakeups from the allocator to the
copygc thread when we start to notice we're low on free buckets.

Reported-by: [email protected]
Signed-off-by: Kent Overstreet <[email protected]>
---
 fs/bcachefs/alloc_foreground.c |  8 ++++++++
 fs/bcachefs/bcachefs.h         |  2 +-
 fs/bcachefs/movinggc.c         | 22 +++++++++++-----------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 5836870ab882..c7848672796d 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -822,6 +822,14 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                }
        }
 
+       if (bch2_err_matches(ret, BCH_ERR_freelist_empty)) {
+               rcu_read_lock();
+               struct task_struct *t = rcu_dereference(c->copygc_thread);
+               if (t)
+                       wake_up_process(t);
+               rcu_read_unlock();
+       }
+
        return ret;
 }
 
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f4151ee51b03..7cc81fbc4c3a 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -986,7 +986,7 @@ struct bch_fs {
        struct bch_fs_rebalance rebalance;
 
        /* COPYGC */
-       struct task_struct      *copygc_thread;
+       struct task_struct __rcu *copygc_thread;
        struct write_point      copygc_write_point;
        s64                     copygc_wait_at;
        s64                     copygc_wait;
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index d658be90f737..80b18b4b04b7 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -363,19 +363,18 @@ static int bch2_copygc_thread(void *arg)
                }
 
                last = atomic64_read(&clock->now);
-               wait = bch2_copygc_wait_amount(c);
+               wait = max_t(long, 0, bch2_copygc_wait_amount(c) - 
clock->max_slop);
 
-               if (wait > clock->max_slop) {
+               if (wait > 0) {
                        c->copygc_wait_at = last;
                        c->copygc_wait = last + wait;
                        move_buckets_wait(&ctxt, buckets, true);
-                       trace_and_count(c, copygc_wait, c, wait, last + wait);
-                       bch2_kthread_io_clock_wait(clock, last + wait,
-                                       MAX_SCHEDULE_TIMEOUT);
+                       trace_and_count(c, copygc_wait, c, wait, 
c->copygc_wait);
+                       bch2_io_clock_schedule_timeout(clock, c->copygc_wait);
                        continue;
                }
 
-               c->copygc_wait = 0;
+               c->copygc_wait = c->copygc_wait_at = 0;
 
                c->copygc_running = true;
                ret = bch2_copygc(&ctxt, buckets, &did_work);
@@ -407,9 +406,10 @@ static int bch2_copygc_thread(void *arg)
 
 void bch2_copygc_stop(struct bch_fs *c)
 {
-       if (c->copygc_thread) {
-               kthread_stop(c->copygc_thread);
-               put_task_struct(c->copygc_thread);
+       struct task_struct *t = rcu_dereference_protected(c->copygc_thread, 
true);
+       if (t) {
+               kthread_stop(t);
+               put_task_struct(t);
        }
        c->copygc_thread = NULL;
 }
@@ -436,8 +436,8 @@ int bch2_copygc_start(struct bch_fs *c)
 
        get_task_struct(t);
 
-       c->copygc_thread = t;
-       wake_up_process(c->copygc_thread);
+       rcu_assign_pointer(c->copygc_thread, t);
+       wake_up_process(t);
 
        return 0;
 }
-- 
2.45.2


Reply via email to