When fsck.gfs2 discovers a data block in error, it flags the error and especially in pass1, it tries to "undo" the block designations it previously marked in the blockmap. Before this patch, the "undo" functions didn't know when to stop. So it could "undo" designations in the blockmap that it hadn't "done" in the first place. With this patch, if an error is encountered while processing data blocks (not counting duplicate references--for example, blocks marked as 'data' that are really dinodes which it hasn't gotten to yet) it saves off the block where the error occurred. Later, during the "undo" processing, it stops when it reaches the block that flagged the error.
rhbz#902920 --- gfs2/fsck/metawalk.c | 36 +++++++++++++++++++++++++++--------- gfs2/fsck/pass1.c | 2 +- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c index 923a140..4a2dd50 100644 --- a/gfs2/fsck/metawalk.c +++ b/gfs2/fsck/metawalk.c @@ -1325,7 +1325,7 @@ static int build_and_check_metalist(struct gfs2_inode *ip, osi_list_t *mlp, */ static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, struct gfs2_buffer_head *bh, int head_size, - uint64_t *blks_checked) + uint64_t *blks_checked, uint64_t *error_blk) { int error = 0, rc = 0; uint64_t block, *ptr; @@ -1349,8 +1349,13 @@ static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, rc = pass->check_data(ip, metablock, block, pass->private); if (!error && rc) { error = rc; - log_info(_("\nUnrecoverable data block error %d on " - "block %llu (0x%llx).\n"), rc, + log_info("\n"); + if (rc < 0) { + *error_blk = block; + log_info(_("Unrecoverable ")); + } + log_info(_("data block error %d on block %llu " + "(0x%llx).\n"), rc, (unsigned long long)block, (unsigned long long)block); } @@ -1362,7 +1367,8 @@ static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, } static int undo_check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, - uint64_t *ptr_start, char *ptr_end) + uint64_t *ptr_start, char *ptr_end, + uint64_t error_blk) { int rc = 0; uint64_t block, *ptr; @@ -1375,6 +1381,8 @@ static int undo_check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, if (skip_this_pass || fsck_abort) return 1; block = be64_to_cpu(*ptr); + if (block == error_blk) + return 1; rc = pass->undo_check_data(ip, block, pass->private); if (rc < 0) return rc; @@ -1415,6 +1423,8 @@ int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass) uint64_t blks_checked = 0; int error, rc; int metadata_clean = 0; + uint64_t error_blk = 0; + int hit_error_blk = 0; if (!height && !is_dir(&ip->i_di, ip->i_sbd->gfs1)) return 0; @@ -1460,7 +1470,7 @@ int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass) if (pass->check_data) rc = check_data(ip, pass, bh, head_size, - &blks_checked); + &blks_checked, &error_blk); else rc = 0; @@ -1505,12 +1515,20 @@ undo_metalist: i, pass->private); else rc = 0; - if (metadata_clean && rc == 0 && i == height - 1) { + if (metadata_clean && rc == 0 && i == height - 1 && + !hit_error_blk) { head_size = hdr_size(bh, height); - if (head_size) - undo_check_data(ip, pass, (uint64_t *) + if (head_size) { + rc = undo_check_data(ip, pass, + (uint64_t *) (bh->b_data + head_size), - (bh->b_data + ip->i_sbd->bsize)); + (bh->b_data + ip->i_sbd->bsize), + error_blk); + if (rc > 0) { + hit_error_blk = 1; + rc = 0; + } + } } if (bh == ip->i_bh) osi_list_del(&bh->b_altlist); diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c index ee828d8..2c1c046 100644 --- a/gfs2/fsck/pass1.c +++ b/gfs2/fsck/pass1.c @@ -462,7 +462,7 @@ static int check_data(struct gfs2_inode *ip, uint64_t metablock, fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("bad (out of range) data"), gfs2_bad_block); - return 1; + return -1; } bc->data_count++; /* keep the count sane anyway */ q = block_type(block); -- 1.7.11.7