From: Yu Kuai <yuku...@huawei.com> This flag is used by llbitmap in later patches to skip raid456 initial recover and delay building initial xor data to first write.
Signed-off-by: Yu Kuai <yuku...@huawei.com> --- drivers/md/md.c | 47 +++++++++++++++++++++++++++++++++++++++++++++- drivers/md/md.h | 2 ++ drivers/md/raid5.c | 19 +++++++++++++++---- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7196e7f6b2a4..199843356449 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9199,6 +9199,39 @@ static sector_t md_sync_max_sectors(struct mddev *mddev, } } +/* + * If lazy recovery is requested and all rdevs are in sync, select the rdev with + * the higest index to perfore recovery to build initial xor data, this is the + * same as old bitmap. + */ +static bool mddev_select_lazy_recover_rdev(struct mddev *mddev) +{ + struct md_rdev *recover_rdev = NULL; + struct md_rdev *rdev; + bool ret = false; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev->raid_disk < 0) + continue; + + if (test_bit(Faulty, &rdev->flags) || + !test_bit(In_sync, &rdev->flags)) + break; + + if (!recover_rdev || recover_rdev->raid_disk < rdev->raid_disk) + recover_rdev = rdev; + } + + if (recover_rdev) { + clear_bit(In_sync, &recover_rdev->flags); + ret = true; + } + + rcu_read_unlock(); + return ret; +} + static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) { sector_t start = 0; @@ -9230,6 +9263,14 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) start = rdev->recovery_offset; rcu_read_unlock(); + /* + * If there are no spares, and raid456 lazy initial recover is + * requested. + */ + if (test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery) && + start == MaxSector && mddev_select_lazy_recover_rdev(mddev)) + start = 0; + /* If there is a bitmap, we need to make sure all * writes that started before we added a spare * complete before we start doing a recovery. @@ -9791,6 +9832,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); + clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); return true; } @@ -9799,6 +9841,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares) remove_spares(mddev, NULL); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); + clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); return true; } @@ -9808,7 +9851,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares) * re-add. */ *spares = remove_and_add_spares(mddev, NULL); - if (*spares) { + if (*spares || test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery)) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); @@ -10021,6 +10064,7 @@ void md_check_recovery(struct mddev *mddev) } clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); + clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags); @@ -10131,6 +10175,7 @@ void md_reap_sync_thread(struct mddev *mddev) clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery); /* * We call mddev->cluster_ops->update_size here because sync_size could * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared, diff --git a/drivers/md/md.h b/drivers/md/md.h index 4fa5a3e68a0c..7b6357879a84 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -667,6 +667,8 @@ enum recovery_flags { MD_RECOVERY_RESHAPE, /* remote node is running resync thread */ MD_RESYNCING_REMOTE, + /* raid456 lazy initial recover */ + MD_RECOVERY_LAZY_RECOVER, }; enum md_ro_state { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 672ab226e43c..5112658ef5f6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4705,10 +4705,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) } } else if (test_bit(In_sync, &rdev->flags)) set_bit(R5_Insync, &dev->flags); - else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) - /* in sync if before recovery_offset */ - set_bit(R5_Insync, &dev->flags); - else if (test_bit(R5_UPTODATE, &dev->flags) && + else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= + rdev->recovery_offset) { + /* + * in sync if: + * - normal IO, or + * - resync IO that is not lazy recovery + * + * For lazy recovery, we have to mark the rdev without + * In_sync as failed, to build initial xor data. + */ + if (!test_bit(STRIPE_SYNCING, &sh->state) || + !test_bit(MD_RECOVERY_LAZY_RECOVER, + &conf->mddev->recovery)) + set_bit(R5_Insync, &dev->flags); + } else if (test_bit(R5_UPTODATE, &dev->flags) && test_bit(R5_Expanded, &dev->flags)) /* If we've reshaped into here, we assume it is Insync. * We will shortly update recovery_offset to make -- 2.39.2