On Tue, Dec 22, 2020 at 9:39 PM Bob Peterson <[email protected]> wrote:
>
> Hi,
>
> Before this patch, journal recovery was done by a workqueue function that
> operated on a per-journal basis. The problem is, these could run
> simultaneously
> which meant that they could all use the same bio, sd_log_bio, to do their
> writing to all the various journals. These operations overwrote one another
> eventually causing memory corruption.
>
> This patch makes the recovery workqueue operate on a per-superblock basis,
> which means a mount point using, for example journal0, could do recovery
> for all journals that need recovery. This is done consecutively so the
> sd_log_bio is only referenced by one recovery at a time, thus avoiding the
> chaos.
>
> Since the journal recovery requests can come in any order, and unpredictably,
> the new work func loops until there are no more journals to be recovered.
>
> Since multiple processes may request recovery of a journal, and since they
> all now use the same sdp-based workqueue, it's okay for them to get an
> error from queue_work: Queueing work while there's already work queued.
>
> Signed-off-by: Bob Peterson <[email protected]>
> ---
> fs/gfs2/incore.h | 2 +-
> fs/gfs2/ops_fstype.c | 2 +-
> fs/gfs2/recovery.c | 32 ++++++++++++++++++++++++++++----
> 3 files changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 8e1ab8ed4abc..b393cbf9efeb 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -529,7 +529,6 @@ struct gfs2_jdesc {
> struct list_head jd_list;
> struct list_head extent_list;
> unsigned int nr_extents;
> - struct work_struct jd_work;
> struct inode *jd_inode;
> unsigned long jd_flags;
> #define JDF_RECOVERY 1
> @@ -746,6 +745,7 @@ struct gfs2_sbd {
> struct completion sd_locking_init;
> struct completion sd_wdack;
> struct delayed_work sd_control_work;
> + struct work_struct sd_recovery_work;
>
> /* Inode Stuff */
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index 61fce59cb4d3..3d9a6d6d42cb 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -93,6 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
> init_completion(&sdp->sd_locking_init);
> init_completion(&sdp->sd_wdack);
> spin_lock_init(&sdp->sd_statfs_spin);
> + INIT_WORK(&sdp->sd_recovery_work, gfs2_recover_func);
>
> spin_lock_init(&sdp->sd_rindex_spin);
> sdp->sd_rindex_tree.rb_node = NULL;
> @@ -586,7 +587,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct
> gfs2_holder *ji_gh)
> INIT_LIST_HEAD(&jd->extent_list);
> INIT_LIST_HEAD(&jd->jd_revoke_list);
>
> - INIT_WORK(&jd->jd_work, gfs2_recover_func);
> jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
> if (IS_ERR_OR_NULL(jd->jd_inode)) {
> if (!jd->jd_inode)
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index c26c68ebd29d..cd3e66cdb560 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -399,9 +399,8 @@ static void recover_local_statfs(struct gfs2_jdesc *jd,
> return;
> }
>
> -void gfs2_recover_func(struct work_struct *work)
> +static void gfs2_recover_one(struct gfs2_jdesc *jd)
> {
> - struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc,
> jd_work);
> struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
> struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> struct gfs2_log_header_host head;
> @@ -562,16 +561,41 @@ void gfs2_recover_func(struct work_struct *work)
> wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
> }
>
> +void gfs2_recover_func(struct work_struct *work)
> +{
> + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd,
> + sd_recovery_work);
> + struct gfs2_jdesc *jd;
> + int count, recovered = 0;
> +
> + do {
> + count = 0;
> + spin_lock(&sdp->sd_jindex_spin);
> + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
> + if (test_bit(JDF_RECOVERY, &jd->jd_flags)) {
> + spin_unlock(&sdp->sd_jindex_spin);
> + gfs2_recover_one(jd);
> + spin_lock(&sdp->sd_jindex_spin);
> + count++;
> + recovered++;
> + }
> + }
> + spin_unlock(&sdp->sd_jindex_spin);
> + } while (count);
Can't this be a simple loop like below?
repeat:
spin_lock(&sdp->sd_jindex_spin);
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
if (test_bit(JDF_RECOVERY, &jd->jd_flags)) {
spin_unlock(&sdp->sd_jindex_spin);
gfs2_recover_one(jd);
goto repeat;
}
}
spin_unlock(&sdp->sd_jindex_spin);
> + if (recovered > 1)
> + fs_err(sdp, "Journals recovered: %d\n", recovered);
> +}
> +
> int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
> {
> + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> int rv;
>
> if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
> return -EBUSY;
>
> /* we have JDF_RECOVERY, queue should always succeed */
> - rv = queue_work(gfs_recovery_wq, &jd->jd_work);
> - BUG_ON(!rv);
> + rv = queue_work(gfs_recovery_wq, &sdp->sd_recovery_work);
>
> if (wait)
> wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
Thanks,
Andreas