Hi,

On 16 December and 19 December, I posted predecessor patches for
this problem. I've subsequently uncovered problems during testing,
so this is a third version of the patch.

It's not that the patch was buggy: the 3/4ths threshold was okay,
in theory, but it uncovered another problem related to when the
logd daemon thinks (or doesn't think) a journal flush is required.
Function gfs2_ail_flush_reqd uses a journal threshold value of
4/5ths of the journal. If we use a threshold of 3/4ths, we can get
into situations where the logd daemon doesn't think a flush is
necessary even though we may need the log flushed in order to
continue working. By using the same threshold, the log keeps
moving properly in most cases.

This version 3, then, eliminates the value and calculation of the
predecessor in favor of using the same log threshold already used
by gfs2_ail_flush_reqd. This avoids the problem in most cases,
but does not eliminate it entirely. I've written a follow-up patch
for that which I'll post soon.
---
GFS2: Limit number of transaction blocks requested for truncates

This patch limits the number of transaction blocks requested during
file truncates. If we have very large multi-terabyte files, and want
to delete or truncate them, they might span so many resource groups
that we overflow the journal blocks, and cause an assert failure.
By limiting the number of blocks in the transaction, we prevent this
overflow and give other running processes time to do transactions.

The limiting factor I chose is sd_log_thresh2 which is currently
set to 4/5ths of the journal. This same ratio is used in function
gfs2_ail_flush_reqd to determine when a log flush is required.
If we make the maximum value less than this, we can get into a
infinite hang whereby the log stops moving because the number of
used blocks is less than the threshold and the iterative loop
needs more, but since we're under the threshold, the log daemon
never starts any IO on the log.

Signed-off-by: Bob Peterson <rpete...@redhat.com>
---
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 645721f..cbad0ef 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -720,6 +720,7 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrp_list rlist;
+       struct gfs2_trans *tr;
        u64 bn, bstart;
        u32 blen, btotal;
        __be64 *p;
@@ -728,6 +729,7 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
        unsigned int revokes = 0;
        int x;
        int error;
+       int jblocks_rqsted;
 
        error = gfs2_rindex_update(sdp);
        if (error)
@@ -791,12 +793,17 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
        if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock 
held */
                gfs2_rs_deltree(&ip->i_res);
 
-       error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
-                                RES_INDIRECT + RES_STATFS + RES_QUOTA,
-                                revokes);
+restart:
+       jblocks_rqsted = rg_blocks + RES_DINODE +
+               RES_INDIRECT + RES_STATFS + RES_QUOTA +
+               gfs2_struct2blk(sdp, revokes, sizeof(u64));
+       if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
+               jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
+       error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
        if (error)
                goto out_rg_gunlock;
 
+       tr = current->journal_info;
        down_write(&ip->i_rw_mutex);
 
        gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -810,6 +817,16 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
                if (!*p)
                        continue;
 
+               /* check for max reasonable journal transaction blocks */
+               if (tr->tr_num_buf_new + RES_STATFS +
+                   RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
+                       if (rg_blocks >= tr->tr_num_buf_new)
+                               rg_blocks -= tr->tr_num_buf_new;
+                       else
+                               rg_blocks = 0;
+                       break;
+               }
+
                bn = be64_to_cpu(*p);
 
                if (bstart + blen == bn)
@@ -827,6 +844,9 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
                *p = 0;
                gfs2_add_inode_blocks(&ip->i_inode, -1);
        }
+       if (p == bottom)
+               rg_blocks = 0;
+
        if (bstart) {
                __gfs2_free_blocks(ip, bstart, blen, metadata);
                btotal += blen;
@@ -844,6 +864,9 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
 
        gfs2_trans_end(sdp);
 
+       if (rg_blocks)
+               goto restart;
+
 out_rg_gunlock:
        gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
 out_rlist:

Reply via email to