Hi Bob,
On Tue, May 26, 2020 at 3:05 PM Bob Peterson <[email protected]> wrote:
> This adds checks for gfs2_log_flush being stuck, similarly to the check
> in gfs2_ail1_flush.
>
> Signed-off-by: Bob Peterson <[email protected]>
> ---
> fs/gfs2/log.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 1d51b4781bdd..636c82dda68b 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -145,9 +145,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp)
> struct gfs2_bufdata *bd;
> struct buffer_head *bh;
>
> - fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
> - current->journal_info ? 1 : 0);
> -
> list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
> list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
> bd_ail_st_list) {
> @@ -197,6 +194,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct
> writeback_control *wbc)
> restart:
> ret = 0;
> if (time_after(jiffies, flush_start + (HZ * 600))) {
> + fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! "
> + "t=%d\n", current->journal_info ? 1 : 0);
> dump_ail_list(sdp);
> goto out;
> }
> @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct
> gfs2_glock *gl, u32 flags)
>
> if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
> if (!sdp->sd_log_idle) {
> + unsigned long start = jiffies;
> +
> for (;;) {
> + if (time_after(jiffies, start + (HZ * 600))) {
This should probably have some rate limiting as well, for example:
start = jiffies;
I'm not sure what provides similar rate limiting in gfs2_ail1_flush.
> + fs_err(sdp, "Error: In gfs2_log_flush
> "
> + "for 10 minutes! t=%d\n",
> + current->journal_info ? 1 : 0);
Please don't break the format string up like that; this makes grepping harder.
> + dump_ail_list(sdp);
> + break;
> + }
> gfs2_ail1_start(sdp);
> gfs2_ail1_wait(sdp);
> if (gfs2_ail1_empty(sdp, 0))
> --
> 2.26.2
>