Re: [Cluster-devel] [GFS2 PATCH] gfs2: make recovery workqueue operate on a gfs2 mount point, not journal

Andreas Gruenbacher Tue, 22 Dec 2020 15:28:35 -0800

On Tue, Dec 22, 2020 at 9:39 PM Bob Peterson <[email protected]> wrote:
>
> Hi,
>
> Before this patch, journal recovery was done by a workqueue function that
> operated on a per-journal basis. The problem is, these could run 
> simultaneously
> which meant that they could all use the same bio, sd_log_bio, to do their
> writing to all the various journals. These operations overwrote one another
> eventually causing memory corruption.
>
> This patch makes the recovery workqueue operate on a per-superblock basis,
> which means a mount point using, for example journal0, could do recovery
> for all journals that need recovery. This is done consecutively so the
> sd_log_bio is only referenced by one recovery at a time, thus avoiding the
> chaos.
>
> Since the journal recovery requests can come in any order, and unpredictably,
> the new work func loops until there are no more journals to be recovered.
>
> Since multiple processes may request recovery of a journal, and since they
> all now use the same sdp-based workqueue, it's okay for them to get an
> error from queue_work: Queueing work while there's already work queued.
>
> Signed-off-by: Bob Peterson <[email protected]>
> ---
>  fs/gfs2/incore.h     |  2 +-
>  fs/gfs2/ops_fstype.c |  2 +-
>  fs/gfs2/recovery.c   | 32 ++++++++++++++++++++++++++++----
>  3 files changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 8e1ab8ed4abc..b393cbf9efeb 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -529,7 +529,6 @@ struct gfs2_jdesc {
>         struct list_head jd_list;
>         struct list_head extent_list;
>         unsigned int nr_extents;
> -       struct work_struct jd_work;
>         struct inode *jd_inode;
>         unsigned long jd_flags;
>  #define JDF_RECOVERY 1
> @@ -746,6 +745,7 @@ struct gfs2_sbd {
>         struct completion sd_locking_init;
>         struct completion sd_wdack;
>         struct delayed_work sd_control_work;
> +       struct work_struct sd_recovery_work;
>
>         /* Inode Stuff */
>
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index 61fce59cb4d3..3d9a6d6d42cb 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -93,6 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
>         init_completion(&sdp->sd_locking_init);
>         init_completion(&sdp->sd_wdack);
>         spin_lock_init(&sdp->sd_statfs_spin);
> +       INIT_WORK(&sdp->sd_recovery_work, gfs2_recover_func);
>
>         spin_lock_init(&sdp->sd_rindex_spin);
>         sdp->sd_rindex_tree.rb_node = NULL;
> @@ -586,7 +587,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct 
> gfs2_holder *ji_gh)
>                 INIT_LIST_HEAD(&jd->extent_list);
>                 INIT_LIST_HEAD(&jd->jd_revoke_list);
>
> -               INIT_WORK(&jd->jd_work, gfs2_recover_func);
>                 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
>                 if (IS_ERR_OR_NULL(jd->jd_inode)) {
>                         if (!jd->jd_inode)
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index c26c68ebd29d..cd3e66cdb560 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -399,9 +399,8 @@ static void recover_local_statfs(struct gfs2_jdesc *jd,
>         return;
>  }
>
> -void gfs2_recover_func(struct work_struct *work)
> +static void gfs2_recover_one(struct gfs2_jdesc *jd)
>  {
> -       struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, 
> jd_work);
>         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
>         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
>         struct gfs2_log_header_host head;
> @@ -562,16 +561,41 @@ void gfs2_recover_func(struct work_struct *work)
>         wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
>  }
>
> +void gfs2_recover_func(struct work_struct *work)
> +{
> +       struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd,
> +                                           sd_recovery_work);
> +       struct gfs2_jdesc *jd;
> +       int count, recovered = 0;
> +
> +       do {
> +               count = 0;
> +               spin_lock(&sdp->sd_jindex_spin);
> +               list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
> +                       if (test_bit(JDF_RECOVERY, &jd->jd_flags)) {
> +                               spin_unlock(&sdp->sd_jindex_spin);
> +                               gfs2_recover_one(jd);
> +                               spin_lock(&sdp->sd_jindex_spin);
> +                               count++;
> +                               recovered++;
> +                       }
> +               }
> +               spin_unlock(&sdp->sd_jindex_spin);
> +       } while (count);


Can't this be a simple loop like below?

repeat:
        spin_lock(&sdp->sd_jindex_spin);
        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
                if (test_bit(JDF_RECOVERY, &jd->jd_flags)) {
                        spin_unlock(&sdp->sd_jindex_spin);
                        gfs2_recover_one(jd);
                        goto repeat;
                }
        }
        spin_unlock(&sdp->sd_jindex_spin);

> +       if (recovered > 1)
> +               fs_err(sdp, "Journals recovered: %d\n", recovered);
> +}
> +
>  int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
>  {
> +       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
>         int rv;
>
>         if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
>                 return -EBUSY;
>
>         /* we have JDF_RECOVERY, queue should always succeed */
> -       rv = queue_work(gfs_recovery_wq, &jd->jd_work);
> -       BUG_ON(!rv);
> +       rv = queue_work(gfs_recovery_wq, &sdp->sd_recovery_work);
>
>         if (wait)
>                 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,

Thanks,
Andreas

Re: [Cluster-devel] [GFS2 PATCH] gfs2: make recovery workqueue operate on a gfs2 mount point, not journal

Reply via email to