Abhi, On 6 September 2018 at 19:02, Abhi Das <a...@redhat.com> wrote: > Use bio(s) to read in the journal sequentially in large chunks and > locate the head of the journal. > This is faster in most cases when compared to the existing bisect > method which operates one block at a time. > > Signed-off-by: Abhi Das <a...@redhat.com> > --- > fs/gfs2/incore.h | 8 +++- > fs/gfs2/lops.c | 122 > +++++++++++++++++++++++++++++++++++++++++++++------ > fs/gfs2/lops.h | 1 + > fs/gfs2/ops_fstype.c | 1 + > fs/gfs2/recovery.c | 115 +++++------------------------------------------- > 5 files changed, 129 insertions(+), 118 deletions(-) > > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index b96d39c..b24c105 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -529,6 +529,11 @@ struct gfs2_journal_extent { > u64 blocks; > }; > > +enum { > + JDF_RECOVERY = 1, > + JDF_JHEAD = 2, > +}; > + > struct gfs2_jdesc { > struct list_head jd_list; > struct list_head extent_list; > @@ -536,12 +541,13 @@ struct gfs2_jdesc { > struct work_struct jd_work; > struct inode *jd_inode; > unsigned long jd_flags; > -#define JDF_RECOVERY 1 > unsigned int jd_jid; > unsigned int jd_blocks; > int jd_recover_error; > /* Replay stuff */ > > + struct gfs2_log_header_host jd_jhead; > + struct bio *jd_rd_bio; /* bio used for reading this journal */ > unsigned int jd_found_blocks; > unsigned int jd_found_revokes; > unsigned int jd_replayed_blocks; > diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c > index 4cc19af..21979b2 100644 > --- a/fs/gfs2/lops.c > +++ b/fs/gfs2/lops.c > @@ -18,6 +18,7 @@ > #include <linux/fs.h> > #include <linux/list_sort.h> > > +#include "bmap.h" > #include "dir.h" > #include "gfs2.h" > #include "incore.h" > @@ -227,6 +228,50 @@ static void gfs2_end_log_write(struct bio *bio) > wake_up(&sdp->sd_log_flush_wait); > } > > +static void gfs2_end_log_read(struct bio *bio) > +{ > + struct gfs2_jdesc *jd = bio->bi_private; > + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > + struct page *page; > + struct bio_vec *bvec; > + int i, last; > + > + if (bio->bi_status) { > + fs_err(sdp, "Error %d reading from journal, jid=%u\n", > + bio->bi_status, jd->jd_jid); > + } > + > + bio_for_each_segment_all(bvec, bio, i) { > + struct gfs2_log_header_host uninitialized_var(lh); > + void *ptr; > + > + page = bvec->bv_page; > + ptr = page_address(page); > + last = page_private(page); > + > + if (!test_bit(JDF_JHEAD, &jd->jd_flags)) { > + mempool_free(page, gfs2_page_pool); > + continue; > + } > + > + if (!__get_log_header(sdp, ptr, 0, &lh)) { > + if (lh.lh_sequence > jd->jd_jhead.lh_sequence) > + jd->jd_jhead = lh; > + else > + goto found; > + } > + > + if (last) { > + found: > + clear_bit(JDF_JHEAD, &jd->jd_flags); > + wake_up_bit(&jd->jd_flags, JDF_JHEAD); > + } > + mempool_free(page, gfs2_page_pool); > + } > + > + bio_put(bio); > +} > + > /** > * gfs2_log_flush_bio - Submit any pending log bio > * @biop: Address of the bio pointer > @@ -241,8 +286,10 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int > op_flags) > { > struct bio *bio = *biop; > if (bio) { > - struct gfs2_sbd *sdp = bio->bi_private; > - atomic_inc(&sdp->sd_log_in_flight); > + if (op != REQ_OP_READ) { > + struct gfs2_sbd *sdp = bio->bi_private; > + atomic_inc(&sdp->sd_log_in_flight); > + } > bio_set_op_attrs(bio, op, op_flags); > submit_bio(bio); > *biop = NULL; > @@ -253,6 +300,7 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int > op_flags) > * gfs2_log_alloc_bio - Allocate a new bio for log writing > * @jd: The journal descriptor > * @blkno: The next device block number we want to write to > + * @op: REQ_OP > * > * This should never be called when there is a cached bio in the > * super block. When it returns, there will be a cached bio in the > @@ -262,21 +310,24 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int > op_flags) > * Returns: Newly allocated bio > */ > > -static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno) > +static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno, int > op) > { > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > struct super_block *sb = sdp->sd_vfs; > struct bio *bio; > > - BUG_ON(sdp->sd_log_bio); > + BUG_ON((op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio)); > > bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); > bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); > bio_set_dev(bio, sb->s_bdev); > - bio->bi_end_io = gfs2_end_log_write; > - bio->bi_private = sdp; > + bio->bi_end_io = op == REQ_OP_READ ? gfs2_end_log_read : > gfs2_end_log_write; > + bio->bi_private = op == REQ_OP_READ ? (void*)jd : (void*)sdp; > > - sdp->sd_log_bio = bio; > + if (op == REQ_OP_READ) > + jd->jd_rd_bio = bio; > + else > + sdp->sd_log_bio = bio; > > return bio; > } > @@ -285,6 +336,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc > *jd, u64 blkno) > * gfs2_log_get_bio - Get cached log bio, or allocate a new one > * @jd: The journal descriptor > * @blkno: The device block number we want to write to > + * @op: REQ_OP > * > * If there is a cached bio, then if the next block number is sequential > * with the previous one, return it, otherwise flush the bio to the > @@ -294,10 +346,10 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc > *jd, u64 blkno) > * Returns: The bio to use for log writes > */ > > -static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno) > +static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int op) > { > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > - struct bio *bio = sdp->sd_log_bio; > + struct bio *bio = op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio; > u64 nblk; > > if (bio) { > @@ -305,10 +357,12 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc > *jd, u64 blkno) > nblk >>= sdp->sd_fsb2bb_shift; > if (blkno == nblk) > return bio; > - gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0); > + gfs2_log_flush_bio(op == REQ_OP_READ ? &jd->jd_rd_bio > + : &sdp->sd_log_bio, REQ_OP_WRITE, 0);
Shouldn't it be "op" here instead of "REQ_OP_WRITE"? > } > > - return gfs2_log_alloc_bio(sdp->sd_jdesc, blkno); > + return gfs2_log_alloc_bio(op == REQ_OP_READ ? jd : sdp->sd_jdesc, > + blkno, op); > } > > /** > @@ -330,11 +384,11 @@ void gfs2_log_write(struct gfs2_sbd *sdp, struct page > *page, > struct bio *bio; > int ret; > > - bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno); > + bio = gfs2_log_get_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE); > ret = bio_add_page(bio, page, size, offset); > if (ret == 0) { > gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0); > - bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno); > + bio = gfs2_log_alloc_bio(sdp->sd_jdesc, blkno, REQ_OP_WRITE); > ret = bio_add_page(bio, page, size, offset); > WARN_ON(ret == 0); > } > @@ -374,6 +428,48 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct > page *page) > gfs2_log_bmap(sdp)); > } > > +static void gfs2_log_read_extent(struct gfs2_jdesc *jd, u64 dblock, > + unsigned int blocks, int last) > +{ > + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > + struct super_block *sb = sdp->sd_vfs; > + struct page *page; > + int i, ret; > + struct bio *bio; > + > + for (i=0; i<blocks; i++) { > + page = mempool_alloc(gfs2_page_pool, GFP_NOIO); > + /* flag the last page of the journal we plan to read in */ > + page_private(page) = (last && i == (blocks - 1)); > + > + bio = gfs2_log_get_bio(jd, dblock + i, REQ_OP_READ); > + ret = bio_add_page(bio, page, sb->s_blocksize, 0); > + if (ret == 0) { > + gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0); > + bio = gfs2_log_alloc_bio(jd, dblock + i, REQ_OP_READ); > + ret = bio_add_page(bio, page, sb->s_blocksize, 0); > + WARN_ON(ret == 0); > + } > + bio->bi_private = jd; > + } > +} > + > +void gfs2_log_read(struct gfs2_jdesc *jd) > +{ > + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > + int last = 0; > + struct gfs2_journal_extent *je; > + > + if (list_empty(&jd->extent_list)) > + gfs2_map_journal_extents(sdp, jd); > + > + list_for_each_entry(je, &jd->extent_list, list) { > + last = list_is_last(&je->list, &jd->extent_list); > + gfs2_log_read_extent(jd, je->dblock, je->blocks, last); > + gfs2_log_flush_bio(&jd->jd_rd_bio, REQ_OP_READ, 0); > + } > +} > + > static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, > u32 ld_length, u32 ld_data1) > { > diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h > index d709d99..23392c5d 100644 > --- a/fs/gfs2/lops.h > +++ b/fs/gfs2/lops.h > @@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct > page *page, > extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); > extern void gfs2_log_flush_bio(struct bio **biop, int op, int op_flags); > extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); > +extern void gfs2_log_read(struct gfs2_jdesc *jd); > > static inline unsigned int buf_limit(struct gfs2_sbd *sdp) > { > diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c > index c2469833b..dcc488b4 100644 > --- a/fs/gfs2/ops_fstype.c > +++ b/fs/gfs2/ops_fstype.c > @@ -578,6 +578,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct > gfs2_holder *ji_gh) > kfree(jd); > break; > } > + jd->jd_rd_bio = NULL; > > spin_lock(&sdp->sd_jindex_spin); > jd->jd_jid = sdp->sd_journals++; > diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c > index 1b95294..e90abe6 100644 > --- a/fs/gfs2/recovery.c > +++ b/fs/gfs2/recovery.c > @@ -182,85 +182,11 @@ static int get_log_header(struct gfs2_jdesc *jd, > unsigned int blk, > } > > /** > - * find_good_lh - find a good log header > - * @jd: the journal > - * @blk: the segment to start searching from > - * @lh: the log header to fill in > - * @forward: if true search forward in the log, else search backward > - * > - * Call get_log_header() to get a log header for a segment, but if the > - * segment is bad, either scan forward or backward until we find a good one. > - * > - * Returns: errno > - */ > - > -static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, > - struct gfs2_log_header_host *head) > -{ > - unsigned int orig_blk = *blk; > - int error; > - > - for (;;) { > - error = get_log_header(jd, *blk, head); > - if (error <= 0) > - return error; > - > - if (++*blk == jd->jd_blocks) > - *blk = 0; > - > - if (*blk == orig_blk) { > - gfs2_consist_inode(GFS2_I(jd->jd_inode)); > - return -EIO; > - } > - } > -} > - > -/** > - * jhead_scan - make sure we've found the head of the log > - * @jd: the journal > - * @head: this is filled in with the log descriptor of the head > - * > - * At this point, seg and lh should be either the head of the log or just > - * before. Scan forward until we find the head. > - * > - * Returns: errno > - */ > - > -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host > *head) > -{ > - unsigned int blk = head->lh_blkno; > - struct gfs2_log_header_host lh; > - int error; > - > - for (;;) { > - if (++blk == jd->jd_blocks) > - blk = 0; > - > - error = get_log_header(jd, blk, &lh); > - if (error < 0) > - return error; > - if (error == 1) > - continue; > - > - if (lh.lh_sequence == head->lh_sequence) { > - gfs2_consist_inode(GFS2_I(jd->jd_inode)); > - return -EIO; > - } > - if (lh.lh_sequence < head->lh_sequence) > - break; > - > - *head = lh; > - } > - > - return 0; > -} > - > -/** > * gfs2_find_jhead - find the head of a log > * @jd: the journal > * @head: the log descriptor for the head of the log is returned here > * > - * Do a binary search of a journal and find the valid log entry with the > + * Do a search of a journal and find the valid log entry with the > * highest sequence number. (i.e. the log head) > * > * Returns: errno > @@ -268,38 +194,19 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct > gfs2_log_header_host *head) > > int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) > { > - struct gfs2_log_header_host lh_1, lh_m; > - u32 blk_1, blk_2, blk_m; > - int error; > - > - blk_1 = 0; > - blk_2 = jd->jd_blocks - 1; > - > - for (;;) { > - blk_m = (blk_1 + blk_2) / 2; > - > - error = find_good_lh(jd, &blk_1, &lh_1); > - if (error) > - return error; > - > - error = find_good_lh(jd, &blk_m, &lh_m); > - if (error) > - return error; > - > - if (blk_1 == blk_m || blk_m == blk_2) > - break; > + int error = 0; > > - if (lh_1.lh_sequence <= lh_m.lh_sequence) > - blk_1 = blk_m; > - else > - blk_2 = blk_m; > - } > + memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host)); > + set_bit(JDF_JHEAD, &jd->jd_flags); > + gfs2_log_read(jd); > > - error = jhead_scan(jd, &lh_1); > - if (error) > - return error; > + if (test_bit(JDF_JHEAD, &jd->jd_flags)) > + wait_on_bit(&jd->jd_flags, JDF_JHEAD, TASK_INTERRUPTIBLE); > > - *head = lh_1; > + if (jd->jd_jhead.lh_sequence == 0) > + error = 1; > + else > + *head = jd->jd_jhead; > > return error; > } > -- > 2.4.11 > Thanks, Andreas