Hi,

Thats a worthwhile improvement. Now in the -nmw git tree. Thanks,

Steve.

On Mon, 2008-03-10 at 18:17 -0500, Bob Peterson wrote:
> Hi,
> 
> This version of the gfs2_bitfit algorithm includes the latest
> suggestions from Steve Whitehouse.  It is typically eight to
> ten times faster than the version we're using today.  If there
> is a lot of metadata mixed in (lots of small files) the
> algorithm is often 15 times faster, and given the right
> conditions, I've seen peaks of 20 times faster.
> 
> Regards,
> 
> Bob Peterson
> 
> Signed-off-by: Bob Peterson <[EMAIL PROTECTED]>
> --
>  fs/gfs2/rgrp.c |   93 ++++++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 61 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 4291375..7e8f0b1 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -14,6 +14,7 @@
>  #include <linux/fs.h>
>  #include <linux/gfs2_ondisk.h>
>  #include <linux/lm_interface.h>
> +#include <linux/prefetch.h>
>  
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -33,6 +34,16 @@
>  #define BFITNOENT ((u32)~0)
>  #define NO_BLOCK ((u64)~0)
>  
> +#if BITS_PER_LONG == 32
> +#define LBITMASK   (0x55555555UL)
> +#define LBITSKIP55 (0x55555555UL)
> +#define LBITSKIP00 (0x00000000UL)
> +#else
> +#define LBITMASK   (0x5555555555555555UL)
> +#define LBITSKIP55 (0x5555555555555555UL)
> +#define LBITSKIP00 (0x0000000000000000UL)
> +#endif
> +
>  /*
>   * These routines are used by the resource group routines (rgrp.c)
>   * to keep track of block allocation.  Each block is represented by two
> @@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct 
> gfs2_rgrpd *rgd,
>  static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
>                      u8 old_state)
>  {
> -     const u8 *byte;
> -     u32 blk = goal;
> -     unsigned int bit, bitlong;
> -     const unsigned long *plong;
> -#if BITS_PER_LONG == 32
> -     const unsigned long plong55 = 0x55555555;
> -#else
> -     const unsigned long plong55 = 0x5555555555555555;
> -#endif
> -
> -     byte = buffer + (goal / GFS2_NBBY);
> -     plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
> -     bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
> -     bitlong = bit;
> -
> -     while (byte < buffer + buflen) {
> -
> -             if (bitlong == 0 && old_state == 0 && *plong == plong55) {
> -                     plong++;
> -                     byte += sizeof(unsigned long);
> -                     blk += sizeof(unsigned long) * GFS2_NBBY;
> -                     continue;
> +     const u8 *byte, *start, *end;
> +     int bit, startbit;
> +     u32 g1, g2, misaligned;
> +     unsigned long *plong;
> +     unsigned long lskipval;
> +
> +     lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
> +     g1 = (goal / GFS2_NBBY);
> +     start = buffer + g1;
> +     byte = start;
> +        end = buffer + buflen;
> +     g2 = ALIGN(g1, sizeof(unsigned long));
> +     plong = (unsigned long *)(buffer + g2);
> +     startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
> +     misaligned = g2 - g1;
> +     if (!misaligned)
> +             goto ulong_aligned;
> +/* parse the bitmap a byte at a time */
> +misaligned:
> +     while (byte < end) {
> +             if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
> +                     return goal +
> +                             (((byte - start) * GFS2_NBBY) +
> +                              ((bit - startbit) >> 1));
>               }
> -             if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
> -                     return blk;
>               bit += GFS2_BIT_SIZE;
> -             if (bit >= 8) {
> +             if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
>                       bit = 0;
>                       byte++;
> +                     misaligned--;
> +                     if (!misaligned) {
> +                             plong = (unsigned long *)byte;
> +                             goto ulong_aligned;
> +                     }
>               }
> -             bitlong += GFS2_BIT_SIZE;
> -             if (bitlong >= sizeof(unsigned long) * 8) {
> -                     bitlong = 0;
> -                     plong++;
> -             }
> -
> -             blk++;
>       }
> +     return BFITNOENT;
>  
> +/* parse the bitmap a unsigned long at a time */
> +ulong_aligned:
> +     /* Stop at "end - 1" or else prefetch can go past the end and segfault.
> +        We could "if" it but we'd lose some of the performance gained.
> +        This way will only slow down searching the very last 4/8 bytes
> +        depending on architecture.  I've experimented with several ways
> +        of writing this section such as using an else before the goto
> +        but this one seems to be the fastest. */
> +     while ((unsigned char *)plong < end - 1) {
> +             prefetch(plong + 1);
> +             if (((*plong) & LBITMASK) != lskipval)
> +                     break;
> +             plong++;
> +     }
> +     if ((unsigned char *)plong < end) {
> +             byte = (const u8 *)plong;
> +             misaligned += sizeof(unsigned long) - 1;
> +             goto misaligned;
> +     }
>       return BFITNOENT;
>  }
>  
> 
> 

Reply via email to