The following patch cleans up the recovery code and fixes a few bugs along the way. The bugs are: o An incorrect assumption about the size of the journal o An issue where the superblock was being used to store variables local to the recovery process which would cause a problem if multiple journals were recovered at once. o Can report incorrect counts of blocks read & recovered in some cases (this is harmless, its just a logging issue)
Features: o Moves the recovery code from lops.c into recovery.c which allows making a number of functions static and removing other bits of code. o Removes the "before scan" functions as they are not needed (partly merged into the "scan" functions) o Removes the "after scan" functions. These have also been merged into the "scan" functions o We no longer call any functions which may in turn call withdraw from the recovery code. If there is an issue with recovery, we report it to the caller (and userspace). o New uevent env variable is documented o Superblock shrinks by 32 bytes on 64 bit arches. o Code shrinks by about 100 lines (probably more since there are more comments now) TODO: o Report where error has occurred in log, as well as what the error is o Check code for finding journal headers (maybe remove gfs2_log_header_host?) o Testing :-) For the moment, this is just a heads up on what I'm working on. I hope it won't be too long before I have a final version of this patch, Steve. diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt index fd966dc..c029596 100644 --- a/Documentation/filesystems/gfs2-uevents.txt +++ b/Documentation/filesystems/gfs2-uevents.txt @@ -44,6 +44,10 @@ for every journal recovered, whether it is during the initial mount process or as the result of gfs_controld requesting a specific journal recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file. +If the recovery has failed, then on recent versions of GFS2 the +ERROR= variable will also be included. This returns a kernel +error code indicating what went wrong during recovery. + Because the CHANGE uevent was used (in early versions of gfs_controld) without checking the environment variables to discover the state, we cannot add any more functions to it without running the risk of diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 4792200..e497aaf 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -50,12 +50,6 @@ struct gfs2_log_operations { void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); void (*lo_before_commit) (struct gfs2_sbd *sdp); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); - void (*lo_before_scan) (struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, int pass); - int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, - struct gfs2_log_descriptor *ld, __be64 *ptr, - int pass); - void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass); const char *lo_name; }; @@ -648,15 +642,6 @@ struct gfs2_sbd { struct list_head sd_ail2_list; u64 sd_ail_sync_gen; - /* Replay stuff */ - - struct list_head sd_revoke_list; - unsigned int sd_replay_tail; - - unsigned int sd_found_blocks; - unsigned int sd_found_revokes; - unsigned int sd_replayed_blocks; - /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index de97632..4d301af 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -136,6 +136,12 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_trans *tr; lock_buffer(bd->bd_bh); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { + printk(KERN_ERR "GFS2: %s mh error: buf_lo_add block %llu\n", + sdp->sd_fsname, (unsigned long long)bd->bd_bh->b_blocknr); + BUG(); + } gfs2_log_lock(sdp); if (!list_empty(&bd->bd_list_tr)) goto out; @@ -147,9 +153,7 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) goto out; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); - mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; @@ -235,84 +239,6 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); } -static void buf_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, int pass) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - - if (pass != 0) - return; - - sdp->sd_found_blocks = 0; - sdp->sd_replayed_blocks = 0; -} - -static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, - struct gfs2_log_descriptor *ld, __be64 *ptr, - int pass) -{ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_glock *gl = ip->i_gl; - unsigned int blks = be32_to_cpu(ld->ld_data1); - struct buffer_head *bh_log, *bh_ip; - u64 blkno; - int error = 0; - - if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) - return 0; - - gfs2_replay_incr_blk(sdp, &start); - - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { - blkno = be64_to_cpu(*ptr++); - - sdp->sd_found_blocks++; - - if (gfs2_revoke_check(sdp, blkno, start)) - continue; - - error = gfs2_replay_read_block(jd, start, &bh_log); - if (error) - return error; - - bh_ip = gfs2_meta_new(gl, blkno); - memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); - - if (gfs2_meta_check(sdp, bh_ip)) - error = -EIO; - else - mark_buffer_dirty(bh_ip); - - brelse(bh_log); - brelse(bh_ip); - - if (error) - break; - - sdp->sd_replayed_blocks++; - } - - return error; -} - -static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) -{ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - - if (error) { - gfs2_meta_sync(ip->i_gl); - return; - } - if (pass != 1) - return; - - gfs2_meta_sync(ip->i_gl); - - fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); -} static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { @@ -370,85 +296,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) submit_bh(WRITE_SYNC_PLUG, bh); } -static void revoke_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, int pass) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - - if (pass != 0) - return; - - sdp->sd_found_revokes = 0; - sdp->sd_replay_tail = head->lh_tail; -} - -static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, - struct gfs2_log_descriptor *ld, __be64 *ptr, - int pass) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - unsigned int blks = be32_to_cpu(ld->ld_length); - unsigned int revokes = be32_to_cpu(ld->ld_data1); - struct buffer_head *bh; - unsigned int offset; - u64 blkno; - int first = 1; - int error; - - if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) - return 0; - - offset = sizeof(struct gfs2_log_descriptor); - - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { - error = gfs2_replay_read_block(jd, start, &bh); - if (error) - return error; - - if (!first) - gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); - - while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { - blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); - - error = gfs2_revoke_add(sdp, blkno, start); - if (error < 0) { - brelse(bh); - return error; - } - else if (error) - sdp->sd_found_revokes++; - - if (!--revokes) - break; - offset += sizeof(u64); - } - - brelse(bh); - offset = sizeof(struct gfs2_meta_header); - first = 0; - } - - return 0; -} - -static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - - if (error) { - gfs2_revoke_clean(sdp); - return; - } - if (pass != 1) - return; - - fs_info(sdp, "jid=%u: Found %u revoke tags\n", - jd->jd_jid, sdp->sd_found_revokes); - - gfs2_revoke_clean(sdp); -} - static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_rgrpd *rgd; @@ -643,78 +490,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); } -static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, - struct gfs2_log_descriptor *ld, - __be64 *ptr, int pass) -{ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_glock *gl = ip->i_gl; - unsigned int blks = be32_to_cpu(ld->ld_data1); - struct buffer_head *bh_log, *bh_ip; - u64 blkno; - u64 esc; - int error = 0; - - if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) - return 0; - - gfs2_replay_incr_blk(sdp, &start); - for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { - blkno = be64_to_cpu(*ptr++); - esc = be64_to_cpu(*ptr++); - - sdp->sd_found_blocks++; - - if (gfs2_revoke_check(sdp, blkno, start)) - continue; - - error = gfs2_replay_read_block(jd, start, &bh_log); - if (error) - return error; - - bh_ip = gfs2_meta_new(gl, blkno); - memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); - - /* Unescape */ - if (esc) { - __be32 *eptr = (__be32 *)bh_ip->b_data; - *eptr = cpu_to_be32(GFS2_MAGIC); - } - mark_buffer_dirty(bh_ip); - - brelse(bh_log); - brelse(bh_ip); - if (error) - break; - - sdp->sd_replayed_blocks++; - } - - return error; -} - -/* FIXME: sort out accounting for log blocks etc. */ - -static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) -{ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - - if (error) { - gfs2_meta_sync(ip->i_gl); - return; - } - if (pass != 1) - return; - - /* data sync? */ - gfs2_meta_sync(ip->i_gl); - - fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); -} - static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) { struct list_head *head = &sdp->sd_log_le_databuf; @@ -734,18 +509,12 @@ const struct gfs2_log_operations gfs2_buf_lops = { .lo_add = buf_lo_add, .lo_before_commit = buf_lo_before_commit, .lo_after_commit = buf_lo_after_commit, - .lo_before_scan = buf_lo_before_scan, - .lo_scan_elements = buf_lo_scan_elements, - .lo_after_scan = buf_lo_after_scan, .lo_name = "buf", }; const struct gfs2_log_operations gfs2_revoke_lops = { .lo_add = revoke_lo_add, .lo_before_commit = revoke_lo_before_commit, - .lo_before_scan = revoke_lo_before_scan, - .lo_scan_elements = revoke_lo_scan_elements, - .lo_after_scan = revoke_lo_after_scan, .lo_name = "revoke", }; @@ -759,8 +528,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { .lo_add = databuf_lo_add, .lo_before_commit = databuf_lo_before_commit, .lo_after_commit = databuf_lo_after_commit, - .lo_scan_elements = databuf_lo_scan_elements, - .lo_after_scan = databuf_lo_after_scan, .lo_name = "databuf", }; diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 3c0b273..c2f8dc0 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -73,41 +73,5 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_log_ops[x]->lo_after_commit(sdp, ai); } -static inline void lops_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, - unsigned int pass) -{ - int x; - for (x = 0; gfs2_log_ops[x]; x++) - if (gfs2_log_ops[x]->lo_before_scan) - gfs2_log_ops[x]->lo_before_scan(jd, head, pass); -} - -static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start, - struct gfs2_log_descriptor *ld, - __be64 *ptr, - unsigned int pass) -{ - int x, error; - for (x = 0; gfs2_log_ops[x]; x++) - if (gfs2_log_ops[x]->lo_scan_elements) { - error = gfs2_log_ops[x]->lo_scan_elements(jd, start, - ld, ptr, pass); - if (error) - return error; - } - - return 0; -} - -static inline void lops_after_scan(struct gfs2_jdesc *jd, int error, - unsigned int pass) -{ - int x; - for (x = 0; gfs2_log_ops[x]; x++) - if (gfs2_log_ops[x]->lo_before_scan) - gfs2_log_ops[x]->lo_after_scan(jd, error, pass); -} - #endif /* __LOPS_DOT_H__ */ diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index cb8d7a9..e89f14d 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -136,24 +136,6 @@ void gfs2_aspace_put(struct inode *aspace) } /** - * gfs2_meta_sync - Sync all buffers associated with a glock - * @gl: The glock - * - */ - -void gfs2_meta_sync(struct gfs2_glock *gl) -{ - struct address_space *mapping = gl->gl_aspace->i_mapping; - int error; - - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - - if (error) - gfs2_io_error(gl->gl_sbd); -} - -/** * gfs2_getbuf - Get a buffer with a given address space * @gl: the glock * @blkno: the block number (filesystem scope) diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index de270c2..38cca55 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -40,8 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); void gfs2_aspace_put(struct inode *aspace); -void gfs2_meta_sync(struct gfs2_glock *gl); - struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index edfee24..bf7361a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -115,8 +115,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); - INIT_LIST_HEAD(&sdp->sd_revoke_list); - mutex_init(&sdp->sd_freeze_lock); return sdp; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index e594d9e..64b4892 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -27,8 +27,15 @@ #include "util.h" #include "dir.h" -int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, - struct buffer_head **bh) +struct gfs2_recovery_ops { + u32 type; + int (*fxn)(struct gfs2_jdesc *jd, struct list_head *revoke_list, + const struct gfs2_log_descriptor *ld, + u32 tail, u32 start, const __be64 *ptr); +}; + +static int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, + struct buffer_head **bh) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_glock *gl = ip->i_gl; @@ -40,19 +47,16 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen); if (error) return error; - if (!dblock) { - gfs2_consist_inode(ip); - return -EIO; - } + if (!dblock) + return -ESRCH; *bh = gfs2_meta_ra(gl, dblock, extlen); return error; } -int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +static int gfs2_revoke_add(struct list_head *head, u64 blkno, unsigned int where) { - struct list_head *head = &sdp->sd_revoke_list; struct gfs2_revoke_replay *rr; int found = 0; @@ -79,13 +83,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) return 1; } -int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +static int gfs2_revoke_check(struct list_head *head, u64 blkno, u32 tail, unsigned int where) { struct gfs2_revoke_replay *rr; int wrap, a, b, revoke; int found = 0; - list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { + list_for_each_entry(rr, head, rr_list) { if (rr->rr_blkno == blkno) { found = 1; break; @@ -95,17 +99,16 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) if (!found) return 0; - wrap = (rr->rr_where < sdp->sd_replay_tail); - a = (sdp->sd_replay_tail < where); + wrap = (rr->rr_where < tail); + a = (tail < where); b = (where < rr->rr_where); revoke = (wrap) ? (a || b) : (a && b); return revoke; } -void gfs2_revoke_clean(struct gfs2_sbd *sdp) +static void gfs2_revoke_clean(struct list_head *head) { - struct list_head *head = &sdp->sd_revoke_list; struct gfs2_revoke_replay *rr; while (!list_empty(head)) { @@ -115,19 +118,38 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp) } } -static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) +static void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) { const struct gfs2_log_header *str = buf; - if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) || - str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH)) - return 1; - lh->lh_sequence = be64_to_cpu(str->lh_sequence); lh->lh_flags = be32_to_cpu(str->lh_flags); lh->lh_tail = be32_to_cpu(str->lh_tail); lh->lh_blkno = be32_to_cpu(str->lh_blkno); lh->lh_hash = be32_to_cpu(str->lh_hash); +} + +static int gfs2_check_log_header(const void *ptr, u32 blkno) +{ + const struct gfs2_log_header *lh = ptr; + const struct gfs2_meta_header *mh = &lh->lh_header; + const u32 nothing = 0; + u32 hash; + + if (mh->mh_magic != cpu_to_be32(GFS2_MAGIC) || + mh->mh_type != cpu_to_be32(GFS2_METATYPE_LH)) + return 1; + + hash = crc32_le((u32)~0, (unsigned char const *)lh, sizeof(struct gfs2_log_header) - + sizeof(u32)); + hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); + hash ^= (u32)~0; + + if (be32_to_cpu(lh->lh_hash) != hash) + return -EINVAL; + if (be32_to_cpu(lh->lh_blkno) != blkno) + return -EINVAL; + return 0; } @@ -150,22 +172,17 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, { struct buffer_head *bh; struct gfs2_log_header_host uninitialized_var(lh); - const u32 nothing = 0; - u32 hash; int error; error = gfs2_replay_read_block(jd, blk, &bh); if (error) return error; - hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) - - sizeof(u32)); - hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing)); - hash ^= (u32)~0; - error = gfs2_log_header_in(&lh, bh->b_data); + error = gfs2_check_log_header(bh->b_data, blk); + gfs2_log_header_in(&lh, bh->b_data); brelse(bh); - if (error || lh.lh_blkno != blk || lh.lh_hash != hash) + if (error) return 1; *head = lh; @@ -200,10 +217,8 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, if (++*blk == jd->jd_blocks) *blk = 0; - if (*blk == orig_blk) { - gfs2_consist_inode(GFS2_I(jd->jd_inode)); - return -EIO; - } + if (*blk == orig_blk) + return -EINVAL; } } @@ -234,10 +249,8 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) if (error == 1) continue; - if (lh.lh_sequence == head->lh_sequence) { - gfs2_consist_inode(GFS2_I(jd->jd_inode)); - return -EIO; - } + if (lh.lh_sequence == head->lh_sequence) + return -EINVAL; if (lh.lh_sequence < head->lh_sequence) break; @@ -296,6 +309,199 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) return error; } +static int gfs2_recover_metadata(struct gfs2_jdesc *jd, + struct list_head *revoke_list, + const struct gfs2_log_descriptor *ld, + u32 tail, u32 start, const __be64 *ptr) +{ + struct gfs2_inode *ip = GFS2_I(jd->jd_inode); + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); + struct gfs2_glock *gl = ip->i_gl; + unsigned int blks = be32_to_cpu(ld->ld_data1); + struct buffer_head *bh_log, *bh_ip; + unsigned int found_blocks = 0; + unsigned int replayed_blocks = 0; + const struct gfs2_meta_header *mh; + u64 blkno; + int error = 0; + + start++; + start %= jd->jd_blocks; + + for (; blks; start++, blks--) { + start %= jd->jd_blocks; + blkno = be64_to_cpu(*ptr++); + + found_blocks++; + + if (gfs2_revoke_check(revoke_list, blkno, tail, start)) + continue; + + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + mh = (const struct gfs2_meta_header *)bh_ip->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) + error = -EINVAL; + else + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + + if (error) + break; + + replayed_blocks++; + } + if (error) + return error; + fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", + jd->jd_jid, replayed_blocks, found_blocks); + return 0; +} + +static int gfs2_recover_jdata(struct gfs2_jdesc *jd, + struct list_head *revoke_list, + const struct gfs2_log_descriptor *ld, + u32 tail, u32 start, const __be64 *ptr) +{ + struct gfs2_inode *ip = GFS2_I(jd->jd_inode); + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); + struct gfs2_glock *gl = ip->i_gl; + unsigned int blks = be32_to_cpu(ld->ld_data1); + struct buffer_head *bh_log, *bh_ip; + unsigned int found_blocks = 0; + unsigned int replayed_blocks = 0; + u64 blkno; + u64 esc; + int error = 0; + + start++; + for (; blks; start++, blks--) { + start %= jd->jd_blocks; + blkno = be64_to_cpu(*ptr++); + esc = be64_to_cpu(*ptr++); + + found_blocks++; + + if (gfs2_revoke_check(revoke_list, blkno, tail, start)) + continue; + + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + /* Unescape */ + if (esc) { + __be32 *eptr = (__be32 *)bh_ip->b_data; + *eptr = cpu_to_be32(GFS2_MAGIC); + } + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + if (error) + break; + + replayed_blocks++; + } + if (error) + return error; + fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", + jd->jd_jid, replayed_blocks, found_blocks); + return 0; +} + +static int gfs2_recover_revoke(struct gfs2_jdesc *jd, + struct list_head *revoke_list, + const struct gfs2_log_descriptor *ld, + u32 tail, u32 start, const __be64 *ptr) +{ + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); + unsigned int blks = be32_to_cpu(ld->ld_length); + unsigned int revokes = be32_to_cpu(ld->ld_data1); + const struct gfs2_meta_header *mh; + struct buffer_head *bh; + unsigned int offset; + u64 blkno; + int first = 1; + int error; + unsigned int found_revokes = 0; + + offset = sizeof(struct gfs2_log_descriptor); + + for (; blks; start++, blks--) { + start %= jd->jd_blocks; + error = gfs2_replay_read_block(jd, start, &bh); + if (error) + return error; + + if (!first) { + mh = (const struct gfs2_meta_header *)bh->b_data; + if ((mh->mh_magic != cpu_to_be32(GFS2_MAGIC)) || + (mh->mh_type != cpu_to_be32(GFS2_METATYPE_LB))) + return -EINVAL; + } + + while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { + blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); + + error = gfs2_revoke_add(revoke_list, blkno, start); + if (error < 0) { + brelse(bh); + return error; + } else if (error) + found_revokes++; + + if (!--revokes) + break; + offset += sizeof(u64); + } + + brelse(bh); + offset = sizeof(struct gfs2_meta_header); + first = 0; + } + + fs_info(sdp, "jid=%u: Found %u revoke tags\n", + jd->jd_jid, found_revokes); + return 0; +} + +static const struct gfs2_recovery_ops recovery_pass0[] = { + { .type = GFS2_LOG_DESC_REVOKE, .fxn = gfs2_recover_revoke, }, + { .type = GFS2_LOG_DESC_METADATA, }, + { .type = GFS2_LOG_DESC_JDATA, }, + { .type = 0, } /* End of list */ +}; + +static const struct gfs2_recovery_ops recovery_pass1[] = { + { .type = GFS2_LOG_DESC_REVOKE, }, + { .type = GFS2_LOG_DESC_METADATA, .fxn = gfs2_recover_metadata }, + { .type = GFS2_LOG_DESC_JDATA, .fxn = gfs2_recover_jdata }, + { .type = 0, } /* End of list */ +}; + +static int find_recovery_op(const struct gfs2_log_descriptor *ld, + const struct gfs2_recovery_ops *ops) +{ + int i; + + for (i = 0; ops[i].type; i++) { + if (ops[i].type == be32_to_cpu(ld->ld_type)) + return i; + } + + return -EINVAL; +} + /** * foreach_descriptor - go through the active part of the log * @jd: the journal @@ -308,16 +514,20 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) * Returns: errno */ -static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, - unsigned int end, int pass) +static int foreach_descriptor(struct gfs2_jdesc *jd, + struct list_head *revoke_list, unsigned int start, + unsigned int end, + const struct gfs2_recovery_ops *ops) { - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct buffer_head *bh; - struct gfs2_log_descriptor *ld; + const struct gfs2_log_descriptor *ld; + const struct gfs2_meta_header *mh; + unsigned int tail = start; int error = 0; u32 length; __be64 *ptr; unsigned int offset = sizeof(struct gfs2_log_descriptor); + offset += sizeof(__be64) - 1; offset &= ~(sizeof(__be64) - 1); @@ -325,42 +535,37 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, error = gfs2_replay_read_block(jd, start, &bh); if (error) return error; - if (gfs2_meta_check(sdp, bh)) { - brelse(bh); - return -EIO; - } - ld = (struct gfs2_log_descriptor *)bh->b_data; - length = be32_to_cpu(ld->ld_length); - - if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { - struct gfs2_log_header_host lh; - error = get_log_header(jd, start, &lh); - if (!error) { - gfs2_replay_incr_blk(sdp, &start); - brelse(bh); + mh = (const struct gfs2_meta_header *)bh->b_data; + switch (be32_to_cpu(mh->mh_type)) { + case GFS2_METATYPE_LD: + break; + case GFS2_METATYPE_LH: + error = gfs2_check_log_header(bh->b_data, start); + if (error == 0) { + start++; + start %= jd->jd_blocks; continue; } - if (error == 1) { - gfs2_consist_inode(GFS2_I(jd->jd_inode)); - error = -EIO; - } - brelse(bh); - return error; - } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) { - brelse(bh); - return -EIO; + default: /* Fall though */ + return -EINVAL; } + + ld = (struct gfs2_log_descriptor *)bh->b_data; + length = be32_to_cpu(ld->ld_length); ptr = (__be64 *)(bh->b_data + offset); - error = lops_scan_elements(jd, start, ld, ptr, pass); - if (error) { - brelse(bh); + error = find_recovery_op(ld, ops); + if (error < 0) return error; - } - - while (length--) - gfs2_replay_incr_blk(sdp, &start); - + if (ops[error].fxn) + error = ops[error].fxn(jd, revoke_list, ld, tail, start, ptr); + else + error = 0; brelse(bh); + if (error) + return error; + + start += length; + start %= jd->jd_blocks; } return 0; @@ -388,15 +593,14 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; lblock = head->lh_blkno; - gfs2_replay_incr_blk(sdp, &lblock); + lblock++; + lblock %= jd->jd_blocks; bh_map.b_size = 1 << ip->i_inode.i_blkbits; error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); if (error) return error; - if (!bh_map.b_blocknr) { - gfs2_consist_inode(ip); - return -EIO; - } + if (!bh_map.b_blocknr) + return -ESRCH; bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr); lock_buffer(bh); @@ -426,20 +630,44 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea return error; } +/** + * gfs2_recovery_done - Notify the results of recovery to userspace + * @sdp: The superblock + * @jid: The journal id + * @errno: The error associated with the journal recovery result + * + * This sends a uevent and also prints log messages to notify userspace + * about the result of a journal recovery attempt. If @errno is zero then + * it is considered successful. There are a number of possible reasons + * for failure, including: + * -EROFS - The block device is read-only + * -EINVAL - Some invalid data was read from the journal + * -EIO - An I/O error occured while reading the journal or writing back + * changed information + * -ESRCH - Block map on the journal inode failed + */ -static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, - unsigned int message) +static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, int errno) { char env_jid[20]; char env_status[20]; - char *envp[] = { env_jid, env_status, NULL }; + char env_error[20]; + char *envp[] = { env_jid, env_status, NULL, NULL }; + const char *msg = errno ? "Failed" : "Done"; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + ls->ls_recover_jid_done = jid; - ls->ls_recover_jid_status = message; + ls->ls_recover_jid_status = errno ? LM_RD_GAVEUP : LM_RD_SUCCESS; sprintf(env_jid, "JID=%d", jid); - sprintf(env_status, "RECOVERY=%s", - message == LM_RD_SUCCESS ? "Done" : "Failed"); + sprintf(env_status, "RECOVERY=%s", msg); + if (errno) { + sprintf(env_error, "ERROR=%d\n", errno); + envp[2] = env_error; + } kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); + if (errno == -EROFS) + fs_warn(sdp, "jid=%u: Can't replay: read-only block device\n", jid); + fs_info(sdp, "jid=%u: Recovery %s (%d)\n", jid, msg, errno); } static int gfs2_recover_get_ref(struct slow_work *work) @@ -458,6 +686,22 @@ static void gfs2_recover_put_ref(struct slow_work *work) wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } +/** + * gfs2_recover_work - The main journal recovery function + * @work: The context for the recovery + * + * There are two reasons why we recover journals. Firstly at mount + * time we recover the journal which we are about to use and if we + * are the first node to mount the filesystem, we also recover all + * the other journals before other nodes are allowed to mount. Once + * we are mounted, if a node fails, then this function is scheduled + * to recover its journal. We never recover our own journal except at + * mount time. + * + * The results of recovery are logged and also sent to userspace + * via a uevent message. + */ + static void gfs2_recover_work(struct slow_work *work) { struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); @@ -466,9 +710,8 @@ static void gfs2_recover_work(struct slow_work *work) struct gfs2_log_header_host head; struct gfs2_holder j_gh, ji_gh, t_gh; unsigned long t; - int ro = 0; - unsigned int pass; int error; + LIST_HEAD(revoke_list); if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", @@ -510,76 +753,63 @@ static void gfs2_recover_work(struct slow_work *work) if (error) goto fail_gunlock_ji; - if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", - jd->jd_jid); - - t = jiffies; - - /* Acquire a shared hold on the transaction lock */ - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_NOCACHE, &t_gh); - if (error) - goto fail_gunlock_ji; - - if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) - ro = 1; - } else { - if (sdp->sd_vfs->s_flags & MS_RDONLY) { - /* check if device itself is read-only */ - ro = bdev_read_only(sdp->sd_vfs->s_bdev); - if (!ro) { - fs_info(sdp, "recovery required on " - "read-only filesystem.\n"); - fs_info(sdp, "write access will be " - "enabled during recovery.\n"); - } - } - } + /* Clean unmount, skip recovery */ + if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) + goto fail_gunlock_ji; - if (ro) { - fs_warn(sdp, "jid=%u: Can't replay: read-only block " - "device\n", jd->jd_jid); - error = -EROFS; - goto fail_gunlock_tr; - } + fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", jd->jd_jid); - fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); + t = jiffies; + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | LM_FLAG_PRIORITY | + GL_NOCACHE, &t_gh); + if (error) + goto fail_gunlock_ji; - for (pass = 0; pass < 2; pass++) { - lops_before_scan(jd, &head, pass); - error = foreach_descriptor(jd, head.lh_tail, - head.lh_blkno, pass); - lops_after_scan(jd, error, pass); - if (error) - goto fail_gunlock_tr; - } + error = -EROFS; + if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + goto out_gunlock_tr; + } else if (sdp->sd_vfs->s_flags & MS_RDONLY) { + /* check if device itself is read-only */ + if (bdev_read_only(sdp->sd_vfs->s_bdev)) + goto out_gunlock_tr; + fs_info(sdp, "recovery required on read-only filesystem.\n"); + fs_info(sdp, "write access will be enabled during recovery.\n"); + } - error = clean_journal(jd, &head); - if (error) - goto fail_gunlock_tr; + fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); - gfs2_glock_dq_uninit(&t_gh); - t = DIV_ROUND_UP(jiffies - t, HZ); - fs_info(sdp, "jid=%u: Journal replayed in %lus\n", - jd->jd_jid, t); + /* Pass 0: Build revoke list, check block types & lengths */ + error = foreach_descriptor(jd, &revoke_list, head.lh_tail, + head.lh_blkno, recovery_pass0); + if (error == 0) { + /* Pass 1: Scan metadata & jdata */ + error = foreach_descriptor(jd, &revoke_list, head.lh_tail, + head.lh_blkno, recovery_pass1); } - if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) - gfs2_glock_dq_uninit(&ji_gh); + gfs2_revoke_clean(&revoke_list); + if (error) + goto out_gunlock_tr; - gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); + /* Write back any changed blocks */ + error = filemap_fdatawrite(ip->i_inode.i_mapping); + if (error) + goto out_gunlock_tr; + error = filemap_fdatawait(ip->i_inode.i_mapping); + if (error) + goto out_gunlock_tr; - if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) - gfs2_glock_dq_uninit(&j_gh); + /* Write a clean, unmount journal header */ + error = clean_journal(jd, &head); + if (error) + goto out_gunlock_tr; - fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); - return; + t = DIV_ROUND_UP(jiffies - t, HZ); + fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t); -fail_gunlock_tr: +out_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); fail_gunlock_ji: if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { @@ -587,11 +817,8 @@ fail_gunlock_ji: fail_gunlock_j: gfs2_glock_dq_uninit(&j_gh); } - - fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); - fail: - gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); + gfs2_recovery_done(sdp, jd->jd_jid, error); } struct slow_work_ops gfs2_recover_ops = { diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 1616ac2..fbc6300 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -12,19 +12,6 @@ #include "incore.h" -static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) -{ - if (++*blk == sdp->sd_jdesc->jd_blocks) - *blk = 0; -} - -extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, - struct buffer_head **bh); - -extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); - extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 33e96b0..eca400d 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -77,23 +77,6 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *type, const char *function, char *file, unsigned int line); -static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, - struct buffer_head *bh, - const char *function, - char *file, unsigned int line) -{ - struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; - u32 magic = be32_to_cpu(mh->mh_magic); - if (unlikely(magic != GFS2_MAGIC)) - return gfs2_meta_check_ii(sdp, bh, "magic number", function, - file, line); - return 0; -} - -#define gfs2_meta_check(sdp, bh) \ -gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__) - - int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, u16 type, u16 t, const char *function,