Hi, Thanks for the comments, Steve. Here is another version that shows the same performance benefit (better, actually). It's much simpler than the previous one. Instead of keeping a bitmap, it simply uses a u32 in the gfs2_inode to keep track of where it's last read-ahead. That avoids a lot of the issues you wrote about.
I couldn't use the file struct because of the way function gfs2_dir_read is called from NFS. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson <rpete...@redhat.com> -- fs/gfs2/dir.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/incore.h | 1 + 2 files changed, 51 insertions(+), 0 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 2045d70..31888bd 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -76,6 +76,8 @@ #define IS_LEAF 1 /* Hashed (leaf) directory */ #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ +#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ + #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) @@ -345,6 +347,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) if (hc) return hc; + ip->i_ra_index = 0; hsize = 1 << ip->i_depth; hsize *= sizeof(__be64); if (hsize != i_size_read(&ip->i_inode)) { @@ -382,6 +385,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc = ip->i_hash_cache; + ip->i_ra_index = 0; ip->i_hash_cache = NULL; kfree(hc); } @@ -1377,6 +1381,50 @@ out: } +/* gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. + * + * Note: we can't calculate each index like dir_e_read can because we don't + * have the leaf, and therefore we don't have the depth, and therefore we + * don't have the length. So we have to just read enough ahead to make up + * for the loss of information. */ +static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; + struct buffer_head *bh; + u64 blocknr = 0, last; + unsigned count; + + /* First check if we've already read-ahead for the whole range. */ + if (index + MAX_RA_BLOCKS < ip->i_ra_index) + return; + + ip->i_ra_index = max(index, ip->i_ra_index); + for (count = 0; count < MAX_RA_BLOCKS; count++) { + if (ip->i_ra_index >= hsize) /* if exceeded the hash table */ + break; + + last = blocknr; + blocknr = be64_to_cpu(ip->i_hash_cache[ip->i_ra_index]); + ip->i_ra_index++; + if (blocknr == last) + continue; + + bh = gfs2_getbuf(gl, blocknr, 1); + if (trylock_buffer(bh)) { + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + brelse(bh); + continue; + } + bh->b_end_io = end_buffer_read_sync; + submit_bh(READA | REQ_META, bh); + continue; + } + brelse(bh); + } +} + /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @dip: dinode pointer @@ -1406,6 +1454,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, if (IS_ERR(lp)) return PTR_ERR(lp); + gfs2_dir_readahead(inode, hsize, index); + while (index < hsize) { error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, &copied, &depth, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 892ac37..50c3bcb 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -286,6 +286,7 @@ struct gfs2_inode { struct rw_semaphore i_rw_mutex; struct list_head i_trunc_list; __be64 *i_hash_cache; + u32 i_ra_index; /* read-ahead index */ u32 i_entries; u32 i_diskflags; u8 i_height;