From: Chen Cheng <[email protected]>

Save token as mddev-scoped in mddev->noio_flags cause PF_MEMALLOC_NOIO
leak into task A, while task B restores a token that it never saved.

scenario:

task A                          mddev                         task B
======                          =======                       ============
write suspend_lo
mddev_suspend()
                                suspended == 0
                                drain active_io
                                suspended = 1
A: noio_A = memalloc_noio_save()
A returns with PF_MEMALLOC_NOIO set

                                                              write suspend_hi
                                                              mddev_suspend()
                                suspended == 1
                                suspended = 2
                                                              B returns
                                                              (no save)

mddev_resume()
                                suspended = 1
                                not last resume
A returns
A still has PF_MEMALLOC_NOIO   <-- leaked

                                                              mddev_resume()
                                suspended = 0
                                                              
memalloc_noio_restore(noio_A)
                                                              (restores A's 
token in B)

Fixed by:
  - return each caller's noio_flags from mddev_suspend()
  - pass that token back into mddev_resume()
  - update the suspend-and-lock helpers to carry the token
  - store the token in struct raid_set for dm-raid paths where suspend
    and resume are paired across callbacks

Validation:
repeatedly updates the array's suspend_lo and suspend_hi sysfs from many
concurrent userspace workers. That makes multiple tasks to call
mddev_suspend()/mddev_resume() concurrently.

Each worker:
  - reads its initial /proc/self/stat flags and verifies that PF_MEMALLOC_NOIO 
is not already
    set
  - writes 0 to either suspend_lo or suspend_hi
  - immediately reads its own task flags again
  - reports success if flags & PF_MEMALLOC_NOIO is true after the write returns

Link: 
https://github.com/chencheng-fnnas/reproducer/blob/main/repro-md-noio-token-leak.sh

Fixes: 78f57ef9d50a ("md: use memalloc scope APIs in 
mddev_suspend()/mddev_resume()")

Signed-off-by: Chen Cheng <[email protected]>
---
 drivers/md/dm-raid.c       |  7 ++--
 drivers/md/md-autodetect.c |  5 ++-
 drivers/md/md-bitmap.c     | 12 +++---
 drivers/md/md.c            | 85 ++++++++++++++++++++++----------------
 drivers/md/md.h            | 23 ++++++-----
 drivers/md/raid5-cache.c   | 11 +++--
 drivers/md/raid5.c         | 25 ++++++-----
 7 files changed, 97 insertions(+), 71 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 8f5a5e1342a9..d89207e3722a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -239,10 +239,11 @@ struct raid_set {
        int raid_disks;
        int delta_disks;
        int data_offset;
        int raid10_copies;
        int requested_bitmap_chunk_sectors;
+       unsigned int suspend_noio_flags;
 
        struct mddev md;
        struct raid_type *raid_type;
 
        sector_t array_sectors;
@@ -3251,11 +3252,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int 
argc, char **argv)
        /* Start raid set read-only and assumed clean to change in 
raid_resume() */
        rs->md.ro = MD_RDONLY;
        rs->md.in_sync = 1;
 
        /* Has to be held on running the array */
-       mddev_suspend_and_lock_nointr(&rs->md);
+       mddev_suspend_and_lock_nointr(&rs->md, &rs->suspend_noio_flags);
 
        /* Keep array frozen until resume. */
        md_frozen_sync_thread(&rs->md);
 
        r = md_run(&rs->md);
@@ -3863,11 +3864,11 @@ static void raid_postsuspend(struct dm_target *ti)
                /*
                 * sync_thread must be stopped during suspend, and writes have
                 * to be stopped before suspending to avoid deadlocks.
                 */
                md_stop_writes(&rs->md);
-               mddev_suspend(&rs->md, false);
+               mddev_suspend(&rs->md, false, &rs->suspend_noio_flags);
                rs->md.ro = MD_RDONLY;
        }
        clear_bit(MD_DM_SUSPENDING, &mddev->flags);
 
 }
@@ -4141,11 +4142,11 @@ static void raid_resume(struct dm_target *ti)
                                                       
lockdep_is_held(&mddev->reconfig_mutex)));
                clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
                mddev->ro = MD_RDWR;
                mddev->in_sync = 0;
                md_unfrozen_sync_thread(mddev);
-               mddev_unlock_and_resume(mddev);
+               mddev_unlock_and_resume(mddev, rs->suspend_noio_flags);
        }
 }
 
 static struct target_type raid_target = {
        .name = "raid",
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 4b80165afd23..58e062cd0580 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -126,10 +126,11 @@ static void __init md_setup_drive(struct md_setup_args 
*args)
        dev_t devices[MD_SB_DISKS + 1], mdev;
        struct mdu_array_info_s ainfo = { };
        struct mddev *mddev;
        int err = 0, i;
        char name[16];
+       unsigned int noio_flags;
 
        if (args->partitioned) {
                mdev = MKDEV(mdp_major, args->minor << MdpMinorShift);
                sprintf(name, "md_d%d", args->minor);
        } else {
@@ -173,11 +174,11 @@ static void __init md_setup_drive(struct md_setup_args 
*args)
        if (IS_ERR(mddev)) {
                pr_err("md: md_alloc failed - cannot start array %s\n", name);
                return;
        }
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err) {
                pr_err("md: failed to lock array %s\n", name);
                goto out_mddev_put;
        }
 
@@ -219,11 +220,11 @@ static void __init md_setup_drive(struct md_setup_args 
*args)
        if (!err)
                err = do_md_run(mddev);
        if (err)
                pr_warn("md: starting %s failed\n", name);
 out_unlock:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
 out_mddev_put:
        mddev_put(mddev);
 }
 
 static int __init raid_setup(char *str)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 028b9ca8ce52..74b7f569a3f4 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2620,13 +2620,14 @@ location_show(struct mddev *mddev, char *page)
 }
 
 static ssize_t
 location_store(struct mddev *mddev, const char *buf, size_t len)
 {
+       unsigned int noio_flags;
        int rv;
 
-       rv = mddev_suspend_and_lock(mddev);
+       rv = mddev_suspend_and_lock(mddev, &noio_flags);
        if (rv)
                return rv;
 
        if (mddev->pers) {
                if (mddev->recovery || mddev->sync_thread) {
@@ -2711,11 +2712,11 @@ location_store(struct mddev *mddev, const char *buf, 
size_t len)
                set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
                md_wakeup_thread(mddev->thread);
        }
        rv = 0;
 out:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        if (rv)
                return rv;
        return len;
 
 merge_err:
@@ -2831,17 +2832,18 @@ backlog_store(struct mddev *mddev, const char *buf, 
size_t len)
 {
        unsigned long backlog;
        unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
        struct md_rdev *rdev;
        bool has_write_mostly = false;
+       unsigned int noio_flags;
        int rv = kstrtoul(buf, 10, &backlog);
        if (rv)
                return rv;
        if (backlog > COUNTER_MAX)
                return -EINVAL;
 
-       rv = mddev_suspend_and_lock(mddev);
+       rv = mddev_suspend_and_lock(mddev, &noio_flags);
        if (rv)
                return rv;
 
        /*
         * Without write mostly device, it doesn't make sense to set
@@ -2854,11 +2856,11 @@ backlog_store(struct mddev *mddev, const char *buf, 
size_t len)
                }
        }
        if (!has_write_mostly) {
                pr_warn_ratelimited("%s: can't set backlog, no write mostly 
device available\n",
                                    mdname(mddev));
-               mddev_unlock(mddev);
+               mddev_unlock_and_resume(mddev, noio_flags);
                return -EINVAL;
        }
 
        mddev->bitmap_info.max_write_behind = backlog;
        if (!backlog && mddev->serial_info_pool) {
@@ -2871,11 +2873,11 @@ backlog_store(struct mddev *mddev, const char *buf, 
size_t len)
                        mddev_create_serial_pool(mddev, rdev);
        }
        if (old_mwb != backlog)
                bitmap_update_sb(mddev->bitmap);
 
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return len;
 }
 
 static struct md_sysfs_entry bitmap_backlog =
 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1377c407614c..86d938dee50a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -459,11 +459,12 @@ static void md_submit_bio(struct bio *bio)
 
 /*
  * Make sure no new requests are submitted to the device, and any requests that
  * have been submitted are completely handled.
  */
-int mddev_suspend(struct mddev *mddev, bool interruptible)
+int mddev_suspend(struct mddev *mddev, bool interruptible,
+                 unsigned int *noio_flags)
 {
        int err = 0;
 
        /*
         * hold reconfig_mutex to wait for normal io will deadlock, because
@@ -478,10 +479,11 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
                mutex_lock(&mddev->suspend_mutex);
        if (err)
                return err;
 
        if (mddev->suspended) {
+               *noio_flags = memalloc_noio_save();
                WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
                mutex_unlock(&mddev->suspend_mutex);
                return 0;
        }
 
@@ -515,31 +517,30 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
         * prevent deadlock.
         */
        WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
 
        /* restrict memory reclaim I/O during raid array is suspend */
-       mddev->noio_flag = memalloc_noio_save();
+       *noio_flags = memalloc_noio_save();
 
        mutex_unlock(&mddev->suspend_mutex);
        return 0;
 }
 EXPORT_SYMBOL_GPL(mddev_suspend);
 
-static void __mddev_resume(struct mddev *mddev, bool recovery_needed)
+static void __mddev_resume(struct mddev *mddev, bool recovery_needed,
+                          unsigned int noio_flags)
 {
        lockdep_assert_not_held(&mddev->reconfig_mutex);
 
        mutex_lock(&mddev->suspend_mutex);
+       memalloc_noio_restore(noio_flags);
        WRITE_ONCE(mddev->suspended, mddev->suspended - 1);
        if (mddev->suspended) {
                mutex_unlock(&mddev->suspend_mutex);
                return;
        }
 
-       /* entred the memalloc scope from mddev_suspend() */
-       memalloc_noio_restore(mddev->noio_flag);
-
        percpu_ref_resurrect(&mddev->active_io);
        wake_up(&mddev->sb_wait);
 
        if (recovery_needed)
                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -547,13 +548,13 @@ static void __mddev_resume(struct mddev *mddev, bool 
recovery_needed)
        md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
 
        mutex_unlock(&mddev->suspend_mutex);
 }
 
-void mddev_resume(struct mddev *mddev)
+void mddev_resume(struct mddev *mddev, unsigned int noio_flags)
 {
-       return __mddev_resume(mddev, true);
+       return __mddev_resume(mddev, true, noio_flags);
 }
 EXPORT_SYMBOL_GPL(mddev_resume);
 
 /* sync bdev before setting device to readonly or stopping raid*/
 static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int 
opener_num)
@@ -3737,10 +3738,11 @@ rdev_attr_store(struct kobject *kobj, struct attribute 
*attr,
 {
        struct rdev_sysfs_entry *entry = container_of(attr, struct 
rdev_sysfs_entry, attr);
        struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
        struct kernfs_node *kn = NULL;
        bool suspend = false;
+       unsigned int noio_flags = 0;
        ssize_t rv;
        struct mddev *mddev = READ_ONCE(rdev->mddev);
 
        if (!entry->store)
                return -EIO;
@@ -3756,17 +3758,17 @@ rdev_attr_store(struct kobject *kobj, struct attribute 
*attr,
                    cmd_match(page, "writemostly") ||
                    cmd_match(page, "-writemostly"))
                        suspend = true;
        }
 
-       rv = suspend ? mddev_suspend_and_lock(mddev) : mddev_lock(mddev);
+       rv = suspend ? mddev_suspend_and_lock(mddev, &noio_flags) : 
mddev_lock(mddev);
        if (!rv) {
                if (rdev->mddev == NULL)
                        rv = -ENODEV;
                else
                        rv = entry->store(rdev, page, length);
-               suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
+               suspend ? mddev_unlock_and_resume(mddev, noio_flags) : 
mddev_unlock(mddev);
        }
 
        if (kn)
                sysfs_unbreak_active_protection(kn);
 
@@ -4049,15 +4051,16 @@ level_store(struct mddev *mddev, const char *buf, 
size_t len)
        size_t slen = len;
        struct md_personality *pers, *oldpers;
        long level;
        void *priv, *oldpriv;
        struct md_rdev *rdev;
+       unsigned int noio_flags;
 
        if (slen == 0 || slen >= sizeof(clevel))
                return -EINVAL;
 
-       rv = mddev_suspend_and_lock(mddev);
+       rv = mddev_suspend_and_lock(mddev, &noio_flags);
        if (rv)
                return rv;
 
        if (mddev->pers == NULL) {
                memcpy(mddev->clevel, buf, slen);
@@ -4231,11 +4234,11 @@ level_store(struct mddev *mddev, const char *buf, 
size_t len)
                md_update_sb(mddev, 1);
        sysfs_notify_dirent_safe(mddev->sysfs_level);
        md_new_event();
        rv = len;
 out_unlock:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return rv;
 }
 
 static struct md_sysfs_entry md_level =
 __ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
@@ -4410,17 +4413,18 @@ static int update_raid_disks(struct mddev *mddev, int 
raid_disks);
 
 static ssize_t
 raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
 {
        unsigned int n;
+       unsigned int noio_flags;
        int err;
 
        err = kstrtouint(buf, 10, &n);
        if (err < 0)
                return err;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        if (mddev->pers) {
                if (n != mddev->raid_disks)
                        err = update_raid_disks(mddev, n);
@@ -4442,11 +4446,11 @@ raid_disks_store(struct mddev *mddev, const char *buf, 
size_t len)
                mddev->raid_disks = n;
                mddev->reshape_backwards = (mddev->delta_disks < 0);
        } else
                mddev->raid_disks = n;
 out_unlock:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return err ? err : len;
 }
 static struct md_sysfs_entry md_raid_disks =
 __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
 
@@ -4822,10 +4826,11 @@ new_dev_store(struct mddev *mddev, const char *buf, 
size_t len)
        char *e;
        int major = simple_strtoul(buf, &e, 10);
        int minor;
        dev_t dev;
        struct md_rdev *rdev;
+       unsigned int noio_flags;
        int err;
 
        if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
                return -EINVAL;
        minor = simple_strtoul(e+1, &e, 10);
@@ -4834,11 +4839,11 @@ new_dev_store(struct mddev *mddev, const char *buf, 
size_t len)
        dev = MKDEV(major, minor);
        if (major != MAJOR(dev) ||
            minor != MINOR(dev))
                return -EOVERFLOW;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        if (mddev->persistent) {
                rdev = md_import_device(dev, mddev->major_version,
                                        mddev->minor_version);
@@ -4855,18 +4860,18 @@ new_dev_store(struct mddev *mddev, const char *buf, 
size_t len)
                rdev = md_import_device(dev, -2, -1);
        else
                rdev = md_import_device(dev, -1, -1);
 
        if (IS_ERR(rdev)) {
-               mddev_unlock_and_resume(mddev);
+               mddev_unlock_and_resume(mddev, noio_flags);
                return PTR_ERR(rdev);
        }
        err = bind_rdev_to_array(rdev, mddev);
  out:
        if (err)
                export_rdev(rdev);
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        if (!err)
                md_new_event();
        return err ? err : len;
 }
 
@@ -5257,28 +5262,29 @@ static int mddev_start_reshape(struct mddev *mddev)
 static ssize_t
 action_store(struct mddev *mddev, const char *page, size_t len)
 {
        int ret;
        enum sync_action action;
+       unsigned int noio_flags = 0;
 
        if (!mddev->pers || !mddev->pers->sync_request)
                return -EINVAL;
 
        action = md_sync_action_by_name(page);
 retry:
        if (work_busy(&mddev->sync_work))
                flush_work(&mddev->sync_work);
 
        ret = (action == ACTION_RESHAPE) ?
-               mddev_suspend_and_lock(mddev) :
+               mddev_suspend_and_lock(mddev, &noio_flags) :
                mddev_lock(mddev);
        if (ret)
                return ret;
 
        if (work_busy(&mddev->sync_work)) {
                if (action == ACTION_RESHAPE)
-                       mddev_unlock_and_resume(mddev);
+                       mddev_unlock_and_resume(mddev, noio_flags);
                else
                        mddev_unlock(mddev);
                goto retry;
        }
 
@@ -5349,11 +5355,11 @@ action_store(struct mddev *mddev, const char *page, 
size_t len)
        sysfs_notify_dirent_safe(mddev->sysfs_action);
        ret = len;
 
 out:
        if (action == ACTION_RESHAPE)
-               mddev_unlock_and_resume(mddev);
+               mddev_unlock_and_resume(mddev, noio_flags);
        else
                mddev_unlock(mddev);
        return ret;
 }
 
@@ -5640,24 +5646,25 @@ suspend_lo_show(struct mddev *mddev, char *page)
 
 static ssize_t
 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 {
        unsigned long long new;
+       unsigned int noio_flags;
        int err;
 
        err = kstrtoull(buf, 10, &new);
        if (err < 0)
                return err;
        if (new != (sector_t)new)
                return -EINVAL;
 
-       err = mddev_suspend(mddev, true);
+       err = mddev_suspend(mddev, true, &noio_flags);
        if (err)
                return err;
 
        WRITE_ONCE(mddev->suspend_lo, new);
-       mddev_resume(mddev);
+       mddev_resume(mddev, noio_flags);
 
        return len;
 }
 static struct md_sysfs_entry md_suspend_lo =
 __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -5671,24 +5678,25 @@ suspend_hi_show(struct mddev *mddev, char *page)
 
 static ssize_t
 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 {
        unsigned long long new;
+       unsigned int noio_flags;
        int err;
 
        err = kstrtoull(buf, 10, &new);
        if (err < 0)
                return err;
        if (new != (sector_t)new)
                return -EINVAL;
 
-       err = mddev_suspend(mddev, true);
+       err = mddev_suspend(mddev, true, &noio_flags);
        if (err)
                return err;
 
        WRITE_ONCE(mddev->suspend_hi, new);
-       mddev_resume(mddev);
+       mddev_resume(mddev, noio_flags);
 
        return len;
 }
 static struct md_sysfs_entry md_suspend_hi =
 __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -5928,19 +5936,20 @@ static ssize_t serialize_policy_show(struct mddev 
*mddev, char *page)
 static ssize_t
 serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
 {
        int err;
        bool value;
+       unsigned int noio_flags;
 
        err = kstrtobool(buf, &value);
        if (err)
                return err;
 
        if (value == test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
                return len;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) {
                pr_err("md: serialize_policy is only effective for raid1\n");
                err = -EINVAL;
@@ -5953,11 +5962,11 @@ serialize_policy_store(struct mddev *mddev, const char 
*buf, size_t len)
        } else {
                mddev_destroy_serial_pool(mddev, NULL);
                clear_bit(MD_SERIALIZE_POLICY, &mddev->flags);
        }
 unlock:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return err ?: len;
 }
 
 static struct md_sysfs_entry md_serialize_policy =
 __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
@@ -6263,21 +6272,22 @@ EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
 
 /* update the optimal I/O size after a reshape */
 void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes)
 {
        struct queue_limits lim;
+       unsigned int noio_flags;
 
        if (mddev_is_dm(mddev))
                return;
 
        /* don't bother updating io_opt if we can't suspend the array */
-       if (mddev_suspend(mddev, false) < 0)
+       if (mddev_suspend(mddev, false, &noio_flags) < 0)
                return;
        lim = queue_limits_start_update(mddev->gendisk->queue);
        lim.io_opt = lim.io_min * nr_stripes;
        queue_limits_commit_update(mddev->gendisk->queue, &lim);
-       mddev_resume(mddev);
+       mddev_resume(mddev, noio_flags);
 }
 EXPORT_SYMBOL_GPL(mddev_update_io_opt);
 
 static void mddev_delayed_delete(struct work_struct *ws)
 {
@@ -7255,10 +7265,11 @@ static void autorun_array(struct mddev *mddev)
  */
 static void autorun_devices(int part)
 {
        struct md_rdev *rdev0, *rdev, *tmp;
        struct mddev *mddev;
+       unsigned int noio_flags;
 
        pr_info("md: autorun ...\n");
        while (!list_empty(&pending_raid_disks)) {
                int unit;
                dev_t dev;
@@ -7295,27 +7306,27 @@ static void autorun_devices(int part)
 
                mddev = md_alloc(dev, NULL);
                if (IS_ERR(mddev))
                        break;
 
-               if (mddev_suspend_and_lock(mddev))
+               if (mddev_suspend_and_lock(mddev, &noio_flags))
                        pr_warn("md: %s locked, cannot run\n", mdname(mddev));
                else if (mddev->raid_disks || mddev->major_version
                         || !list_empty(&mddev->disks)) {
                        pr_warn("md: %s already running, cannot run %pg\n",
                                mdname(mddev), rdev0->bdev);
-                       mddev_unlock_and_resume(mddev);
+                       mddev_unlock_and_resume(mddev, noio_flags);
                } else {
                        pr_debug("md: created %s\n", mdname(mddev));
                        mddev->persistent = 1;
                        rdev_for_each_list(rdev, tmp, &candidates) {
                                list_del_init(&rdev->same_set);
                                if (bind_rdev_to_array(rdev, mddev))
                                        export_rdev(rdev);
                        }
                        autorun_array(mddev);
-                       mddev_unlock_and_resume(mddev);
+                       mddev_unlock_and_resume(mddev, noio_flags);
                }
                /* on success, candidates will be empty, on error
                 * it won't...
                 */
                rdev_for_each_list(rdev, tmp, &candidates) {
@@ -8329,10 +8340,11 @@ static int __md_set_array_info(struct mddev *mddev, 
void __user *argp)
 
 static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
                        unsigned int cmd, unsigned long arg)
 {
        int err = 0;
+       unsigned int noio_flags = 0;
        void __user *argp = (void __user *)arg;
        struct mddev *mddev = NULL;
 
        err = md_ioctl_valid(cmd);
        if (err)
@@ -8380,11 +8392,11 @@ static int md_ioctl(struct block_device *bdev, 
blk_mode_t mode,
        }
 
        if (!md_is_rdwr(mddev))
                flush_work(&mddev->sync_work);
 
-       err = md_ioctl_need_suspend(cmd) ? mddev_suspend_and_lock(mddev) :
+       err = md_ioctl_need_suspend(cmd) ? mddev_suspend_and_lock(mddev, 
&noio_flags) :
                                           mddev_lock(mddev);
        if (err) {
                pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
                         err, cmd);
                goto out;
@@ -8511,11 +8523,11 @@ static int md_ioctl(struct block_device *bdev, 
blk_mode_t mode,
 unlock:
        if (mddev->hold_active == UNTIL_IOCTL &&
            err != -EINVAL)
                mddev->hold_active = 0;
 
-       md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
+       md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev, noio_flags) 
:
                                     mddev_unlock(mddev);
 
 out:
        if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY))
                clear_bit(MD_CLOSING, &mddev->flags);
@@ -10180,20 +10192,21 @@ static bool md_choose_sync_action(struct mddev 
*mddev, int *spares)
 static void md_start_sync(struct work_struct *ws)
 {
        struct mddev *mddev = container_of(ws, struct mddev, sync_work);
        int spares = 0;
        bool suspend = false;
+       unsigned int noio_flags = 0;
        char *name;
 
        /*
         * If reshape is still in progress, spares won't be added or removed
         * from conf until reshape is done.
         */
        if (mddev->reshape_position == MaxSector &&
            md_spares_need_change(mddev)) {
                suspend = true;
-               mddev_suspend(mddev, false);
+               mddev_suspend(mddev, false, &noio_flags);
        }
 
        mddev_lock_nointr(mddev);
        if (!md_is_rdwr(mddev)) {
                /*
@@ -10237,11 +10250,11 @@ static void md_start_sync(struct work_struct *ws)
         * not set it again. Otherwise, we may cause issue like this one:
         *     https://bugzilla.kernel.org/show_bug.cgi?id=218200
         * Therefore, use __mddev_resume(mddev, false).
         */
        if (suspend)
-               __mddev_resume(mddev, false);
+               __mddev_resume(mddev, false, noio_flags);
        md_wakeup_thread(mddev->sync_thread);
        sysfs_notify_dirent_safe(mddev->sysfs_action);
        md_new_event();
        return;
 
@@ -10257,11 +10270,11 @@ static void md_start_sync(struct work_struct *ws)
         * not set it again. Otherwise, we may cause issue like this one:
         *     https://bugzilla.kernel.org/show_bug.cgi?id=218200
         * Therefore, use __mddev_resume(mddev, false).
         */
        if (suspend)
-               __mddev_resume(mddev, false);
+               __mddev_resume(mddev, false, noio_flags);
 
        wake_up(&resync_wait);
        if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
            mddev->sysfs_action)
                sysfs_notify_dirent_safe(mddev->sysfs_action);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d8daf0f75cbb..3337cd21eb30 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -619,11 +619,10 @@ struct mddev {
        mempool_t *serial_info_pool;
        void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
        struct md_cluster_info          *cluster_info;
        struct md_cluster_operations *cluster_ops;
        unsigned int                    good_device_nr; /* good device num 
within cluster raid */
-       unsigned int                    noio_flag; /* for memalloc scope API */
 
        /*
         * Temporarily store rdev that will be finally removed when
         * reconfig_mutex is unlocked, protected by reconfig_mutex.
         */
@@ -953,12 +952,13 @@ extern void md_stop(struct mddev *mddev);
 extern void md_stop_writes(struct mddev *mddev);
 extern int md_rdev_init(struct md_rdev *rdev);
 extern void md_rdev_clear(struct md_rdev *rdev);
 
 extern bool md_handle_request(struct mddev *mddev, struct bio *bio);
-extern int mddev_suspend(struct mddev *mddev, bool interruptible);
-extern void mddev_resume(struct mddev *mddev);
+extern int mddev_suspend(struct mddev *mddev, bool interruptible,
+                        unsigned int *noio_flags);
+extern void mddev_resume(struct mddev *mddev, unsigned int noio_flags);
 extern void md_idle_sync_thread(struct mddev *mddev);
 extern void md_frozen_sync_thread(struct mddev *mddev);
 extern void md_unfrozen_sync_thread(struct mddev *mddev);
 
 extern void md_update_sb(struct mddev *mddev, int force);
@@ -999,35 +999,38 @@ static inline void mddev_check_write_zeroes(struct mddev 
*mddev, struct bio *bio
        if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
            !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
                mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0;
 }
 
-static inline int mddev_suspend_and_lock(struct mddev *mddev)
+static inline int mddev_suspend_and_lock(struct mddev *mddev,
+                                        unsigned int *noio_flags)
 {
        int ret;
 
-       ret = mddev_suspend(mddev, true);
+       ret = mddev_suspend(mddev, true, noio_flags);
        if (ret)
                return ret;
 
        ret = mddev_lock(mddev);
        if (ret)
-               mddev_resume(mddev);
+               mddev_resume(mddev, *noio_flags);
 
        return ret;
 }
 
-static inline void mddev_suspend_and_lock_nointr(struct mddev *mddev)
+static inline void mddev_suspend_and_lock_nointr(struct mddev *mddev,
+                                                unsigned int *noio_flags)
 {
-       mddev_suspend(mddev, false);
+       mddev_suspend(mddev, false, noio_flags);
        mddev_lock_nointr(mddev);
 }
 
-static inline void mddev_unlock_and_resume(struct mddev *mddev)
+static inline void mddev_unlock_and_resume(struct mddev *mddev,
+                                          unsigned int noio_flags)
 {
        mddev_unlock(mddev);
-       mddev_resume(mddev);
+       mddev_resume(mddev, noio_flags);
 }
 
 struct mdu_array_info_s;
 struct mdu_disk_info_s;
 
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 7b7546bfa21f..6f8e3a624456 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -693,13 +693,15 @@ static void r5c_disable_writeback_async(struct 
work_struct *work)
                   !READ_ONCE(conf->log) ||
                   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 
        log = READ_ONCE(conf->log);
        if (log) {
-               mddev_suspend(mddev, false);
+               unsigned int noio_flags;
+
+               mddev_suspend(mddev, false, &noio_flags);
                log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
-               mddev_resume(mddev);
+               mddev_resume(mddev, noio_flags);
        }
 }
 
 static void r5l_submit_current_io(struct r5l_log *log)
 {
@@ -2603,10 +2605,11 @@ EXPORT_SYMBOL(r5c_journal_mode_set);
 static ssize_t r5c_journal_mode_store(struct mddev *mddev,
                                      const char *page, size_t length)
 {
        int mode = ARRAY_SIZE(r5c_journal_mode_str);
        size_t len = length;
+       unsigned int noio_flags;
        int ret;
 
        if (len < 2)
                return -EINVAL;
 
@@ -2615,15 +2618,15 @@ static ssize_t r5c_journal_mode_store(struct mddev 
*mddev,
 
        while (mode--)
                if (strlen(r5c_journal_mode_str[mode]) == len &&
                    !strncmp(page, r5c_journal_mode_str[mode], len))
                        break;
-       ret = mddev_suspend_and_lock(mddev);
+       ret = mddev_suspend_and_lock(mddev, &noio_flags);
        if (ret)
                return ret;
        ret = r5c_journal_mode_set(mddev, mode);
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return ret ?: length;
 }
 
 struct md_sysfs_entry
 r5c_journal_mode = __ATTR(journal_mode, 0644,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 65ae7d8930fc..6062c4b62cc8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6992,11 +6992,11 @@ raid5_show_stripe_size(struct mddev  *mddev, char *page)
 #if PAGE_SIZE != DEFAULT_STRIPE_SIZE
 static ssize_t
 raid5_store_stripe_size(struct mddev  *mddev, const char *page, size_t len)
 {
        struct r5conf *conf;
-       unsigned long new;
+       unsigned long new, noio_flags;
        int err;
        int size;
 
        if (len >= PAGE_SIZE)
                return -EINVAL;
@@ -7011,11 +7011,11 @@ raid5_store_stripe_size(struct mddev  *mddev, const 
char *page, size_t len)
        if (new % DEFAULT_STRIPE_SIZE != 0 ||
                        new > PAGE_SIZE || new == 0 ||
                        new != roundup_pow_of_two(new))
                return -EINVAL;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
 
        conf = mddev->private;
        if (!conf) {
@@ -7049,11 +7049,11 @@ raid5_store_stripe_size(struct mddev  *mddev, const 
char *page, size_t len)
                err = -ENOMEM;
        }
        mutex_unlock(&conf->cache_size_mutex);
 
 out_unlock:
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return err ?: len;
 }
 
 static struct md_sysfs_entry
 raid5_stripe_size = __ATTR(stripe_size, 0644,
@@ -7127,19 +7127,20 @@ raid5_show_skip_copy(struct mddev *mddev, char *page)
 static ssize_t
 raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
 {
        struct r5conf *conf;
        unsigned long new;
+       unsigned int noio_flags;
        int err;
 
        if (len >= PAGE_SIZE)
                return -EINVAL;
        if (kstrtoul(page, 10, &new))
                return -EINVAL;
        new = !!new;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        conf = mddev->private;
        if (!conf)
                err = -ENODEV;
@@ -7152,11 +7153,11 @@ raid5_store_skip_copy(struct mddev *mddev, const char 
*page, size_t len)
                        lim.features |= BLK_FEAT_STABLE_WRITES;
                else
                        lim.features &= ~BLK_FEAT_STABLE_WRITES;
                err = queue_limits_commit_update(q, &lim);
        }
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
        return err ?: len;
 }
 
 static struct md_sysfs_entry
 raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
@@ -7195,10 +7196,11 @@ static int alloc_thread_groups(struct r5conf *conf, int 
cnt,
 static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
        struct r5conf *conf;
        unsigned int new;
+       unsigned int noio_flags;
        int err;
        struct r5worker_group *new_groups, *old_groups;
        int group_cnt;
 
        if (len >= PAGE_SIZE)
@@ -7207,16 +7209,16 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const 
char *page, size_t len)
                return -EINVAL;
        /* 8192 should be big enough */
        if (new > 8192)
                return -EINVAL;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        conf = mddev->private;
        if (!conf) {
-               mddev_unlock_and_resume(mddev);
+               mddev_unlock_and_resume(mddev, noio_flags);
                return -ENODEV;
        }
        raid5_quiesce(mddev, true);
 
        if (new != conf->worker_cnt_per_group) {
@@ -7237,11 +7239,11 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const 
char *page, size_t len)
                        kfree(old_groups);
                }
        }
 
        raid5_quiesce(mddev, false);
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
 
        return err ?: len;
 }
 
 static struct md_sysfs_entry
@@ -8940,18 +8942,19 @@ static void *raid6_takeover(struct mddev *mddev)
 }
 
 static int raid5_change_consistency_policy(struct mddev *mddev, const char 
*buf)
 {
        struct r5conf *conf;
+       unsigned int noio_flags;
        int err;
 
-       err = mddev_suspend_and_lock(mddev);
+       err = mddev_suspend_and_lock(mddev, &noio_flags);
        if (err)
                return err;
        conf = mddev->private;
        if (!conf) {
-               mddev_unlock_and_resume(mddev);
+               mddev_unlock_and_resume(mddev, noio_flags);
                return -ENODEV;
        }
 
        if (strncmp(buf, "ppl", 3) == 0) {
                /* ppl only works with RAID 5 */
@@ -8990,11 +8993,11 @@ static int raid5_change_consistency_policy(struct mddev 
*mddev, const char *buf)
        }
 
        if (!err)
                md_update_sb(mddev, 1);
 
-       mddev_unlock_and_resume(mddev);
+       mddev_unlock_and_resume(mddev, noio_flags);
 
        return err;
 }
 
 static int raid5_start(struct mddev *mddev)
-- 
2.54.0

Reply via email to