Pass 1 in fsck reads all the inodes, in disk block order based upon the rgrp bitmap. This patch adds a function to create a list of blocks of a certain type from the rgrp bitmap. Using this list it is then possible to do readahead on the inode blocks.
This gives me a gain of around 25% improvement in speed of overall fsck time for my 500G test filesystem. It is also a nice cleanup, since it splits the pass1 function into more easily understood components. Signed-off-by: Steven Whitehouse <[email protected]> diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c index 540f2a9..4c67a83 100644 --- a/gfs2/fsck/pass1.c +++ b/gfs2/fsck/pass1.c @@ -14,6 +14,7 @@ #include <unistd.h> #include <string.h> #include <time.h> +#include <fcntl.h> #include <sys/ioctl.h> #include <inttypes.h> #include <libintl.h> @@ -1547,6 +1548,145 @@ static int check_system_inodes(struct gfs2_sbd *sdp) return 0; } +#define RA_WINDOW 64 + +static unsigned pass1_readahead(struct gfs2_sbd *sdp, uint64_t *ibuf, unsigned n, unsigned cur_window) +{ + unsigned i; + unsigned thresh = (n < RA_WINDOW) ? n : RA_WINDOW; + + for (i = cur_window; i < thresh; i++) + posix_fadvise(sdp->device_fd, ibuf[i]*sdp->bsize, sdp->bsize, POSIX_FADV_WILLNEED); + + return i; +} + +static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n) +{ + struct gfs2_buffer_head *bh; + unsigned i; + unsigned ra_window = 0; + uint64_t block; + + for (i = 0; i < n; i++) { + block = ibuf[i]; + + if (ra_window < RA_WINDOW/2) + ra_window = pass1_readahead(sdp, ibuf + i, n - i, ra_window); + + /* skip gfs1 rindex indirect blocks */ + if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) { + log_debug(_("Skipping rindex indir block " + "%lld (0x%llx)\n"), + (unsigned long long)block, + (unsigned long long)block); + continue; + } + warm_fuzzy_stuff(block); + + if (fsck_abort) { /* if asked to abort */ + gfs2_special_free(&gfs1_rindex_blks); + return FSCK_OK; + } + if (skip_this_pass) { + printf( _("Skipping pass 1 is not a good idea.\n")); + skip_this_pass = FALSE; + fflush(stdout); + } + if (fsck_system_inode(sdp, block)) { + log_debug(_("Already processed system inode " + "%lld (0x%llx)\n"), + (unsigned long long)block, + (unsigned long long)block); + continue; + } + bh = bread(sdp, block); + + if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) { + /* In gfs2, a bitmap mark of 2 means an inode, + but in gfs1 it means any metadata. So if + this is gfs1 and not an inode, it may be + okay. If it's non-dinode metadata, it will + be referenced by an inode, so we need to + skip it here and it will be sorted out + when the referencing inode is checked. */ + if (sdp->gfs1) { + uint32_t check_magic; + + check_magic = ((struct gfs2_meta_header *) + (bh->b_data))->mh_magic; + if (be32_to_cpu(check_magic) == GFS2_MAGIC) { + log_debug( _("Deferring GFS1 " + "metadata block #" + "%" PRIu64" (0x%" + PRIx64 ")\n"), + block, block); + brelse(bh); + continue; + } + } + log_err( _("Found invalid inode at block #" + "%llu (0x%llx)\n"), + (unsigned long long)block, + (unsigned long long)block); + if (gfs2_blockmap_set(bl, block, gfs2_block_free)) { + stack; + brelse(bh); + gfs2_special_free(&gfs1_rindex_blks); + return FSCK_ERROR; + } + check_n_fix_bitmap(sdp, block, gfs2_block_free); + } else if (handle_di(sdp, bh) < 0) { + stack; + brelse(bh); + gfs2_special_free(&gfs1_rindex_blks); + return FSCK_ERROR; + } + /* Ignore everything else - they should be hit by the + handle_di step. Don't check NONE either, because + check_meta passes everything if GFS2_METATYPE_NONE + is specified. Hopefully, other metadata types such + as indirect blocks will be handled when the inode + itself is processed, and if it's not, it should be + caught in pass5. */ + brelse(bh); + } + + return 0; +} + +static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) +{ + unsigned k, n, i; + uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t)); + int ret; + + for (k = 0; k < rgd->ri.ri_length; k++) { + n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE); + + if (n) { + ret = pass1_process_bitmap(sdp, rgd, ibuf, n); + if (ret) + return ret; + } + + /* + For GFS1, we have to count the "free meta" blocks in the + resource group and mark them specially so we can count them + properly in pass5. + */ + if (!sdp->gfs1) + continue; + + n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED); + for (i = 0; i < n; i++) + gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta); + } + + free(ibuf); + return 0; +} + /** * pass1 - walk through inodes and check inode state * @@ -1563,12 +1703,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp) int pass1(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; - struct gfs2_buffer_head *bh; - uint64_t block = 0; struct rgrp_tree *rgd; - int first; uint64_t i; uint64_t rg_count = 0; + int ret; osi_list_init(&gfs1_rindex_blks.list); @@ -1611,115 +1749,10 @@ int pass1(struct gfs2_sbd *sdp) gfs2_meta_rgrp);*/ } - first = 1; + ret = pass1_process_rgrp(sdp, rgd); + if (ret) + return ret; - while (1) { - /* "block" is relative to the entire file system */ - /* Get the next dinode in the file system, according - to the bitmap. This should ONLY be dinodes unless - it's GFS1, in which case it can be any metadata. */ - if (gfs2_next_rg_meta(rgd, &block, first)) - break; - /* skip gfs1 rindex indirect blocks */ - if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) { - log_debug(_("Skipping rindex indir block " - "%lld (0x%llx)\n"), - (unsigned long long)block, - (unsigned long long)block); - first = 0; - continue; - } - warm_fuzzy_stuff(block); - - if (fsck_abort) { /* if asked to abort */ - gfs2_special_free(&gfs1_rindex_blks); - return FSCK_OK; - } - if (skip_this_pass) { - printf( _("Skipping pass 1 is not a good idea.\n")); - skip_this_pass = FALSE; - fflush(stdout); - } - if (fsck_system_inode(sdp, block)) { - log_debug(_("Already processed system inode " - "%lld (0x%llx)\n"), - (unsigned long long)block, - (unsigned long long)block); - first = 0; - continue; - } - bh = bread(sdp, block); - - /*log_debug( _("Checking metadata block #%" PRIu64 - " (0x%" PRIx64 ")\n"), block, block);*/ - - if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) { - /* In gfs2, a bitmap mark of 2 means an inode, - but in gfs1 it means any metadata. So if - this is gfs1 and not an inode, it may be - okay. If it's non-dinode metadata, it will - be referenced by an inode, so we need to - skip it here and it will be sorted out - when the referencing inode is checked. */ - if (sdp->gfs1) { - uint32_t check_magic; - - check_magic = ((struct - gfs2_meta_header *) - (bh->b_data))->mh_magic; - if (be32_to_cpu(check_magic) == - GFS2_MAGIC) { - log_debug( _("Deferring GFS1 " - "metadata block #" - "%" PRIu64" (0x%" - PRIx64 ")\n"), - block, block); - brelse(bh); - first = 0; - continue; - } - } - log_err( _("Found invalid inode at block #" - "%llu (0x%llx)\n"), - (unsigned long long)block, - (unsigned long long)block); - if (gfs2_blockmap_set(bl, block, - gfs2_block_free)) { - stack; - brelse(bh); - gfs2_special_free(&gfs1_rindex_blks); - return FSCK_ERROR; - } - check_n_fix_bitmap(sdp, block, - gfs2_block_free); - } else if (handle_di(sdp, bh) < 0) { - stack; - brelse(bh); - gfs2_special_free(&gfs1_rindex_blks); - return FSCK_ERROR; - } - /* Ignore everything else - they should be hit by the - handle_di step. Don't check NONE either, because - check_meta passes everything if GFS2_METATYPE_NONE - is specified. Hopefully, other metadata types such - as indirect blocks will be handled when the inode - itself is processed, and if it's not, it should be - caught in pass5. */ - brelse(bh); - first = 0; - } - /* - For GFS1, we have to count the "free meta" blocks in the - resource group and mark them specially so we can count them - properly in pass5. - */ - if (!sdp->gfs1) - continue; - first = 1; - while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) { - gfs2_blockmap_set(bl, block, gfs2_freemeta); - first = 0; - } } gfs2_special_free(&gfs1_rindex_blks); return FSCK_OK; diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h index 46d4d67..db31a6c 100644 --- a/gfs2/libgfs2/libgfs2.h +++ b/gfs2/libgfs2/libgfs2.h @@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp); extern int do_init_inum(struct gfs2_sbd *sdp); extern int do_init_statfs(struct gfs2_sbd *sdp); extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type); +extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, + uint64_t *buf, uint8_t state); extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first); extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *block, uint32_t type, int first); -extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, - int first); /* super.c */ extern int check_sb(struct gfs2_sb *sb); diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c index 645c45a..6981072 100644 --- a/gfs2/libgfs2/structures.c +++ b/gfs2/libgfs2/structures.c @@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type) return 0; } +unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state) +{ + struct gfs2_bitmap *bi = &rgd->bits[idx]; + unsigned n = 0; + uint32_t blk = 0; + + while(blk < (bi->bi_len * GFS2_NBBY)) { + blk = gfs2_bitfit(rgd->bh[idx]->b_data + bi->bi_offset, + bi->bi_len, blk, state); + if (blk == BFITNOENT) + break; + buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0; + blk++; + } + + return n; +} + /** * gfs2_next_rg_meta * @rgd: @@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first) return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE); } -int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first) -{ - return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED); -} - /** * next_rg_metatype * @rgd:
