From: Yu Kuai <yuku...@huawei.com>

submit_flushes
 atomic_set(&mddev->flush_pending, 1);
 rdev_for_each_rcu(rdev, mddev)
  atomic_inc(&mddev->flush_pending);
  bi->bi_end_io = md_end_flush
  submit_bio(bi);
                        /* flush io is done first */
                        md_end_flush
                         if (atomic_dec_and_test(&mddev->flush_pending))
                          percpu_ref_put(&mddev->active_io)
                          -> active_io is not released

 if (atomic_dec_and_test(&mddev->flush_pending))
  -> missing release of active_io

For consequence, mddev_suspend() will wait for 'active_io' to be zero
forever.

Fix this problem by releasing 'active_io' in submit_flushes() if
'flush_pending' is decreased to zero.

Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration")
Reported-by: Blazej Kucman <blazej.kuc...@linux.intel.com>
Closes: https://lore.kernel.org/lkml/20240130172524.00004...@linux.intel.com/
Signed-off-by: Yu Kuai <yuku...@huawei.com>
---
 drivers/md/md.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 85fde05c37dd..9e41a9aaba8b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws)
                        rcu_read_lock();
                }
        rcu_read_unlock();
-       if (atomic_dec_and_test(&mddev->flush_pending))
+       if (atomic_dec_and_test(&mddev->flush_pending)) {
+               /* The pair is percpu_ref_get() from md_flush_request() */
+               percpu_ref_put(&mddev->active_io);
+
                queue_work(md_wq, &mddev->flush_work);
+       }
 }
 
 static void md_submit_flush_data(struct work_struct *ws)
-- 
2.39.2


Reply via email to