Hi,

On Fri, 2011-10-07 at 12:01 -0400, Bob Peterson wrote:
> Hi,
> 
> Thanks for the comments, Steve.  Here is another version
> that shows the same performance benefit (better, actually).
> It's much simpler than the previous one.  Instead of keeping
> a bitmap, it simply uses a u32 in the gfs2_inode to keep
> track of where it's last read-ahead.  That avoids a lot of
> the issues you wrote about.
> 
> I couldn't use the file struct because of the way function
> gfs2_dir_read is called from NFS.
> 
Thanks for fixing that up... it looks much simpler now. Its a pity about
the NFS issue. We still need to figure out how to correctly reset the
readahead index correctly though, but I think we can leave that for a
future patch. One possible solution would be to reset it on
lseek(SEEK_SET, 0) for example, or if the readahead index is miles away
from the actual index,

Steve.

> Regards,
> 
> Bob Peterson
> Red Hat File Systems
> 
> Signed-off-by: Bob Peterson <rpete...@redhat.com> 
> --
>  fs/gfs2/dir.c    |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/gfs2/incore.h |    1 +
>  2 files changed, 51 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
> index 2045d70..31888bd 100644
> --- a/fs/gfs2/dir.c
> +++ b/fs/gfs2/dir.c
> @@ -76,6 +76,8 @@
>  #define IS_LEAF     1 /* Hashed (leaf) directory */
>  #define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
>  
> +#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
> +
>  #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
>  #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
>  
> @@ -345,6 +347,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode 
> *ip)
>       if (hc)
>               return hc;
>  
> +     ip->i_ra_index = 0;
>       hsize = 1 << ip->i_depth;
>       hsize *= sizeof(__be64);
>       if (hsize != i_size_read(&ip->i_inode)) {
> @@ -382,6 +385,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode 
> *ip)
>  void gfs2_dir_hash_inval(struct gfs2_inode *ip)
>  {
>       __be64 *hc = ip->i_hash_cache;
> +     ip->i_ra_index = 0;
>       ip->i_hash_cache = NULL;
>       kfree(hc);
>  }
> @@ -1377,6 +1381,50 @@ out:
>  }
>  
> 
> +/* gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
> + *
> + * Note: we can't calculate each index like dir_e_read can because we don't
> + * have the leaf, and therefore we don't have the depth, and therefore we
> + * don't have the length. So we have to just read enough ahead to make up
> + * for the loss of information. */
> +static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 
> index)
> +{
> +     struct gfs2_inode *ip = GFS2_I(inode);
> +     struct gfs2_glock *gl = ip->i_gl;
> +     struct buffer_head *bh;
> +     u64 blocknr = 0, last;
> +     unsigned count;
> +
> +     /* First check if we've already read-ahead for the whole range. */
> +     if (index + MAX_RA_BLOCKS < ip->i_ra_index)
> +             return;
> +
> +     ip->i_ra_index = max(index, ip->i_ra_index);
> +     for (count = 0; count < MAX_RA_BLOCKS; count++) {
> +             if (ip->i_ra_index >= hsize) /* if exceeded the hash table */
> +                     break;
> +
> +             last = blocknr;
> +             blocknr = be64_to_cpu(ip->i_hash_cache[ip->i_ra_index]);
> +             ip->i_ra_index++;
> +             if (blocknr == last)
> +                     continue;
> +
> +             bh = gfs2_getbuf(gl, blocknr, 1);
> +             if (trylock_buffer(bh)) {
> +                     if (buffer_uptodate(bh)) {
> +                             unlock_buffer(bh);
> +                             brelse(bh);
> +                             continue;
> +                     }
> +                     bh->b_end_io = end_buffer_read_sync;
> +                     submit_bh(READA | REQ_META, bh);
> +                     continue;
> +             }
> +             brelse(bh);
> +     }
> +}
> +
>  /**
>   * dir_e_read - Reads the entries from a directory into a filldir buffer
>   * @dip: dinode pointer
> @@ -1406,6 +1454,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, 
> void *opaque,
>       if (IS_ERR(lp))
>               return PTR_ERR(lp);
>  
> +     gfs2_dir_readahead(inode, hsize, index);
> +
>       while (index < hsize) {
>               error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
>                                          &copied, &depth,
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 892ac37..50c3bcb 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -286,6 +286,7 @@ struct gfs2_inode {
>       struct rw_semaphore i_rw_mutex;
>       struct list_head i_trunc_list;
>       __be64 *i_hash_cache;
> +     u32 i_ra_index; /* read-ahead index */
>       u32 i_entries;
>       u32 i_diskflags;
>       u8 i_height;


Reply via email to