Pass 1 in fsck reads all the inodes, in disk block order based upon the
rgrp bitmap. This patch adds a function to create a list of blocks of a
certain type from the rgrp bitmap. Using this list it is then possible
to do readahead on the inode blocks.

This gives me a gain of around 25% improvement in speed of overall fsck
time for my 500G test filesystem.

It is also a nice cleanup, since it splits the pass1 function into more
easily understood components.

Signed-off-by: Steven Whitehouse <[email protected]>

diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 540f2a9..4c67a83 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -14,6 +14,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <time.h>
+#include <fcntl.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
 #include <libintl.h>
@@ -1547,6 +1548,145 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
        return 0;
 }
 
+#define RA_WINDOW 64
+
+static unsigned pass1_readahead(struct gfs2_sbd *sdp, uint64_t *ibuf, unsigned 
n, unsigned cur_window)
+{
+       unsigned i;
+       unsigned thresh = (n < RA_WINDOW) ? n : RA_WINDOW;
+
+       for (i = cur_window; i < thresh; i++)
+               posix_fadvise(sdp->device_fd, ibuf[i]*sdp->bsize, sdp->bsize, 
POSIX_FADV_WILLNEED);
+
+       return i;
+}
+
+static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, 
uint64_t *ibuf, unsigned n)
+{
+       struct gfs2_buffer_head *bh;
+       unsigned i;
+       unsigned ra_window = 0;
+       uint64_t block;
+
+       for (i = 0; i < n; i++) {
+               block = ibuf[i];
+
+               if (ra_window < RA_WINDOW/2)
+                       ra_window = pass1_readahead(sdp, ibuf + i, n - i, 
ra_window);
+
+               /* skip gfs1 rindex indirect blocks */
+               if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
+                       log_debug(_("Skipping rindex indir block "
+                                   "%lld (0x%llx)\n"),
+                                 (unsigned long long)block,
+                                 (unsigned long long)block);
+                       continue;
+               }
+               warm_fuzzy_stuff(block);
+
+               if (fsck_abort) { /* if asked to abort */
+                       gfs2_special_free(&gfs1_rindex_blks);
+                       return FSCK_OK;
+               }
+               if (skip_this_pass) {
+                       printf( _("Skipping pass 1 is not a good idea.\n"));
+                       skip_this_pass = FALSE;
+                       fflush(stdout);
+               }
+               if (fsck_system_inode(sdp, block)) {
+                       log_debug(_("Already processed system inode "
+                                   "%lld (0x%llx)\n"),
+                                 (unsigned long long)block,
+                                 (unsigned long long)block);
+                       continue;
+               }
+               bh = bread(sdp, block);
+
+               if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+                       /* In gfs2, a bitmap mark of 2 means an inode,
+                          but in gfs1 it means any metadata.  So if
+                          this is gfs1 and not an inode, it may be
+                          okay.  If it's non-dinode metadata, it will
+                          be referenced by an inode, so we need to
+                          skip it here and it will be sorted out
+                          when the referencing inode is checked. */
+                       if (sdp->gfs1) {
+                               uint32_t check_magic;
+
+                               check_magic = ((struct gfs2_meta_header *)
+                                              (bh->b_data))->mh_magic;
+                               if (be32_to_cpu(check_magic) == GFS2_MAGIC) {
+                                       log_debug( _("Deferring GFS1 "
+                                                    "metadata block #"
+                                                    "%" PRIu64" (0x%"
+                                                    PRIx64 ")\n"),
+                                                  block, block);
+                                       brelse(bh);
+                                       continue;
+                               }
+                       }
+                       log_err( _("Found invalid inode at block #"
+                                  "%llu (0x%llx)\n"),
+                                (unsigned long long)block,
+                                (unsigned long long)block);
+                       if (gfs2_blockmap_set(bl, block, gfs2_block_free)) {
+                               stack;
+                               brelse(bh);
+                               gfs2_special_free(&gfs1_rindex_blks);
+                               return FSCK_ERROR;
+                       }
+                       check_n_fix_bitmap(sdp, block, gfs2_block_free);
+               } else if (handle_di(sdp, bh) < 0) {
+                       stack;
+                       brelse(bh);
+                       gfs2_special_free(&gfs1_rindex_blks);
+                       return FSCK_ERROR;
+               }
+               /* Ignore everything else - they should be hit by the
+                  handle_di step.  Don't check NONE either, because
+                  check_meta passes everything if GFS2_METATYPE_NONE
+                  is specified.  Hopefully, other metadata types such
+                  as indirect blocks will be handled when the inode
+                  itself is processed, and if it's not, it should be
+                  caught in pass5. */
+               brelse(bh);
+       }
+
+       return 0;
+}
+
+static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
+{
+       unsigned k, n, i;
+       uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t));
+       int ret;
+
+       for (k = 0; k < rgd->ri.ri_length; k++) {
+               n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE);
+
+               if (n) {
+                       ret = pass1_process_bitmap(sdp, rgd, ibuf, n);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                 For GFS1, we have to count the "free meta" blocks in the
+                 resource group and mark them specially so we can count them
+                 properly in pass5.
+                */
+               if (!sdp->gfs1)
+                       continue;
+
+               n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED);
+               for (i = 0; i < n; i++)
+                       gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta);
+       }
+
+       free(ibuf);
+       return 0;
+}
+
 /**
  * pass1 - walk through inodes and check inode state
  *
@@ -1563,12 +1703,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
 int pass1(struct gfs2_sbd *sdp)
 {
        struct osi_node *n, *next = NULL;
-       struct gfs2_buffer_head *bh;
-       uint64_t block = 0;
        struct rgrp_tree *rgd;
-       int first;
        uint64_t i;
        uint64_t rg_count = 0;
+       int ret;
 
        osi_list_init(&gfs1_rindex_blks.list);
 
@@ -1611,115 +1749,10 @@ int pass1(struct gfs2_sbd *sdp)
                        gfs2_meta_rgrp);*/
                }
 
-               first = 1;
+               ret = pass1_process_rgrp(sdp, rgd);
+               if (ret)
+                       return ret;
 
-               while (1) {
-                       /* "block" is relative to the entire file system */
-                       /* Get the next dinode in the file system, according
-                          to the bitmap.  This should ONLY be dinodes unless
-                          it's GFS1, in which case it can be any metadata. */
-                       if (gfs2_next_rg_meta(rgd, &block, first))
-                               break;
-                       /* skip gfs1 rindex indirect blocks */
-                       if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
-                               log_debug(_("Skipping rindex indir block "
-                                           "%lld (0x%llx)\n"),
-                                         (unsigned long long)block,
-                                         (unsigned long long)block);
-                               first = 0;
-                               continue;
-                       }
-                       warm_fuzzy_stuff(block);
-
-                       if (fsck_abort) { /* if asked to abort */
-                               gfs2_special_free(&gfs1_rindex_blks);
-                               return FSCK_OK;
-                       }
-                       if (skip_this_pass) {
-                               printf( _("Skipping pass 1 is not a good 
idea.\n"));
-                               skip_this_pass = FALSE;
-                               fflush(stdout);
-                       }
-                       if (fsck_system_inode(sdp, block)) {
-                               log_debug(_("Already processed system inode "
-                                           "%lld (0x%llx)\n"),
-                                         (unsigned long long)block,
-                                         (unsigned long long)block);
-                               first = 0;
-                               continue;
-                       }
-                       bh = bread(sdp, block);
-
-                       /*log_debug( _("Checking metadata block #%" PRIu64
-                         " (0x%" PRIx64 ")\n"), block, block);*/
-
-                       if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
-                               /* In gfs2, a bitmap mark of 2 means an inode,
-                                  but in gfs1 it means any metadata.  So if
-                                  this is gfs1 and not an inode, it may be
-                                  okay.  If it's non-dinode metadata, it will
-                                  be referenced by an inode, so we need to
-                                  skip it here and it will be sorted out
-                                  when the referencing inode is checked. */
-                               if (sdp->gfs1) {
-                                       uint32_t check_magic;
-
-                                       check_magic = ((struct
-                                                       gfs2_meta_header *)
-                                                      (bh->b_data))->mh_magic;
-                                       if (be32_to_cpu(check_magic) ==
-                                           GFS2_MAGIC) {
-                                               log_debug( _("Deferring GFS1 "
-                                                            "metadata block #"
-                                                            "%" PRIu64" (0x%"
-                                                            PRIx64 ")\n"),
-                                                          block, block);
-                                               brelse(bh);
-                                               first = 0;
-                                               continue;
-                                       }
-                               }
-                               log_err( _("Found invalid inode at block #"
-                                          "%llu (0x%llx)\n"),
-                                        (unsigned long long)block,
-                                        (unsigned long long)block);
-                               if (gfs2_blockmap_set(bl, block,
-                                                     gfs2_block_free)) {
-                                       stack;
-                                       brelse(bh);
-                                       gfs2_special_free(&gfs1_rindex_blks);
-                                       return FSCK_ERROR;
-                               }
-                               check_n_fix_bitmap(sdp, block,
-                                                  gfs2_block_free);
-                       } else if (handle_di(sdp, bh) < 0) {
-                               stack;
-                               brelse(bh);
-                               gfs2_special_free(&gfs1_rindex_blks);
-                               return FSCK_ERROR;
-                       }
-                       /* Ignore everything else - they should be hit by the
-                          handle_di step.  Don't check NONE either, because
-                          check_meta passes everything if GFS2_METATYPE_NONE
-                          is specified.  Hopefully, other metadata types such
-                          as indirect blocks will be handled when the inode
-                          itself is processed, and if it's not, it should be
-                          caught in pass5. */
-                       brelse(bh);
-                       first = 0;
-               }
-               /*
-                 For GFS1, we have to count the "free meta" blocks in the
-                 resource group and mark them specially so we can count them
-                 properly in pass5.
-                */
-               if (!sdp->gfs1)
-                       continue;
-               first = 1;
-               while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) {
-                       gfs2_blockmap_set(bl, block, gfs2_freemeta);
-                       first = 0;
-               }
        }
        gfs2_special_free(&gfs1_rindex_blks);
        return FSCK_OK;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 46d4d67..db31a6c 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp);
 extern int do_init_inum(struct gfs2_sbd *sdp);
 extern int do_init_statfs(struct gfs2_sbd *sdp);
 extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
+extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx,
+                             uint64_t *buf, uint8_t state);
 extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block,
                             int first);
 extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
                                 uint64_t *block, uint32_t type, int first);
-extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block,
-                                int first);
 
 /* super.c */
 extern int check_sb(struct gfs2_sb *sb);
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index 645c45a..6981072 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
        return 0;
 }
 
+unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, 
uint8_t state)
+{
+       struct gfs2_bitmap *bi = &rgd->bits[idx];
+       unsigned n = 0;
+       uint32_t blk = 0;
+
+       while(blk < (bi->bi_len * GFS2_NBBY)) {
+               blk = gfs2_bitfit(rgd->bh[idx]->b_data + bi->bi_offset,
+                                 bi->bi_len, blk, state);
+               if (blk == BFITNOENT)
+                       break;
+               buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0;
+               blk++;
+       }
+
+       return n;
+}
+
 /**
  * gfs2_next_rg_meta
  * @rgd:
@@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t 
*block, int first)
        return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE);
 }
 
-int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first)
-{
-       return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED);
-}
-
 /**
  * next_rg_metatype
  * @rgd:


Reply via email to