Hi, On Fri, 2011-10-07 at 12:01 -0400, Bob Peterson wrote: > Hi, > > Thanks for the comments, Steve. Here is another version > that shows the same performance benefit (better, actually). > It's much simpler than the previous one. Instead of keeping > a bitmap, it simply uses a u32 in the gfs2_inode to keep > track of where it's last read-ahead. That avoids a lot of > the issues you wrote about. > > I couldn't use the file struct because of the way function > gfs2_dir_read is called from NFS. > Thanks for fixing that up... it looks much simpler now. Its a pity about the NFS issue. We still need to figure out how to correctly reset the readahead index correctly though, but I think we can leave that for a future patch. One possible solution would be to reset it on lseek(SEEK_SET, 0) for example, or if the readahead index is miles away from the actual index,
Steve. > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson <rpete...@redhat.com> > -- > fs/gfs2/dir.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/gfs2/incore.h | 1 + > 2 files changed, 51 insertions(+), 0 deletions(-) > > diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c > index 2045d70..31888bd 100644 > --- a/fs/gfs2/dir.c > +++ b/fs/gfs2/dir.c > @@ -76,6 +76,8 @@ > #define IS_LEAF 1 /* Hashed (leaf) directory */ > #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ > > +#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ > + > #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) > #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) > > @@ -345,6 +347,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode > *ip) > if (hc) > return hc; > > + ip->i_ra_index = 0; > hsize = 1 << ip->i_depth; > hsize *= sizeof(__be64); > if (hsize != i_size_read(&ip->i_inode)) { > @@ -382,6 +385,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode > *ip) > void gfs2_dir_hash_inval(struct gfs2_inode *ip) > { > __be64 *hc = ip->i_hash_cache; > + ip->i_ra_index = 0; > ip->i_hash_cache = NULL; > kfree(hc); > } > @@ -1377,6 +1381,50 @@ out: > } > > > +/* gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. > + * > + * Note: we can't calculate each index like dir_e_read can because we don't > + * have the leaf, and therefore we don't have the depth, and therefore we > + * don't have the length. So we have to just read enough ahead to make up > + * for the loss of information. */ > +static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 > index) > +{ > + struct gfs2_inode *ip = GFS2_I(inode); > + struct gfs2_glock *gl = ip->i_gl; > + struct buffer_head *bh; > + u64 blocknr = 0, last; > + unsigned count; > + > + /* First check if we've already read-ahead for the whole range. */ > + if (index + MAX_RA_BLOCKS < ip->i_ra_index) > + return; > + > + ip->i_ra_index = max(index, ip->i_ra_index); > + for (count = 0; count < MAX_RA_BLOCKS; count++) { > + if (ip->i_ra_index >= hsize) /* if exceeded the hash table */ > + break; > + > + last = blocknr; > + blocknr = be64_to_cpu(ip->i_hash_cache[ip->i_ra_index]); > + ip->i_ra_index++; > + if (blocknr == last) > + continue; > + > + bh = gfs2_getbuf(gl, blocknr, 1); > + if (trylock_buffer(bh)) { > + if (buffer_uptodate(bh)) { > + unlock_buffer(bh); > + brelse(bh); > + continue; > + } > + bh->b_end_io = end_buffer_read_sync; > + submit_bh(READA | REQ_META, bh); > + continue; > + } > + brelse(bh); > + } > +} > + > /** > * dir_e_read - Reads the entries from a directory into a filldir buffer > * @dip: dinode pointer > @@ -1406,6 +1454,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, > void *opaque, > if (IS_ERR(lp)) > return PTR_ERR(lp); > > + gfs2_dir_readahead(inode, hsize, index); > + > while (index < hsize) { > error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, > &copied, &depth, > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index 892ac37..50c3bcb 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -286,6 +286,7 @@ struct gfs2_inode { > struct rw_semaphore i_rw_mutex; > struct list_head i_trunc_list; > __be64 *i_hash_cache; > + u32 i_ra_index; /* read-ahead index */ > u32 i_entries; > u32 i_diskflags; > u8 i_height;