On Mon, 2008-11-10 at 11:55 +0000, Mel Gorman wrote:
> In the situation a caller allocates multiple buffers with
> get_hugepage_region(), all the buffers will align to the start of the
> hugepage. This means the buffers are almost certainly using the same
> cachelines and operations like copying between the arrays will be mysteriously
> slow in comparison to buffers allocated from malloc(). In STREAM, allocating
> the three arrays using get_huge_pages() was approximately 40% the throughput
> of malloc().
> 
> When requested, this patch uses bytes that would normally be wasted to
> offset the buffer by some random cacheline. Effort is made to select a
> random cache-line to give good average performance.
> 
> Signed-off-by: Mel Gorman <[EMAIL PROTECTED]>

Acked-by: Adam Litke <[EMAIL PROTECTED]>

> ---
>  alloc.c                   |   49 +++++++++++++++++++++++++++++++++++++++++++-
>  hugetlbfs.h               |    8 +++++-
>  man/get_hugepage_region.3 |    9 +++++++-
>  3 files changed, 61 insertions(+), 5 deletions(-)
> 
> diff --git a/alloc.c b/alloc.c
> index 74bb5a4..4de75b5 100644
> --- a/alloc.c
> +++ b/alloc.c
> @@ -24,6 +24,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <unistd.h>
> +#include <time.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
> 
> @@ -179,6 +180,41 @@ void free_huge_pages(void *ptr)
>       fclose(fd);
>  }
> 
> +/*
> + * Offset the buffer using bytes wasted due to alignment to avoid using the
> + * same cache lines for the start of every buffer returned by
> + * get_huge_pages(). A small effort is made to select a random cacheline
> + * rather than sequential lines to give decent behaviour on average.
> + */
> +void *cachecolor(void *buf, size_t len, size_t color_bytes)
> +{
> +     static long cacheline_size = 0;
> +     static int linemod = 0;
> +     char *bytebuf = (char *)buf;
> +     int numlines;
> +     int line = 0;
> +
> +     /* Lookup our cacheline size once */
> +     if (cacheline_size == 0) {
> +             cacheline_size = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
> +             linemod = time(NULL);
> +     }
> +
> +     numlines = color_bytes / cacheline_size;
> +     DEBUG("%d lines of cacheline size %ld due to %zd wastage\n",
> +             numlines, cacheline_size, color_bytes);
> +     if (numlines) {
> +             line = linemod % numlines;
> +             bytebuf += cacheline_size * line;
> +
> +             /* Pseudo-ish random line selection */
> +             linemod += len % numlines;
> +     }
> +     DEBUG("Using line offset %d from start\n", line);
> +
> +     return bytebuf;
> +}
> +
>  /**
>   * get_hugepage_region - Allocate an amount of memory backed by huge pages
>   *
> @@ -208,12 +244,16 @@ void *get_hugepage_region(size_t len, ghr_t flags)
>               buf = fallback_base_pages(len, flags);
>       }
> 
> -     /* Calculate wastage */
> +     /* Calculate wastage for coloring */
>       wastage = aligned_len - len;
> -     if (wastage != 0)
> +     if (wastage != 0 && !(flags & GHR_COLOR))
>               DEBUG("get_hugepage_region: Wasted %zd bytes due to 
> alignment\n",
>                       wastage);
> 
> +     /* Only colour if requested */
> +     if (flags & GHR_COLOR)
> +             buf = cachecolor(buf, len, wastage);
> +
>       return buf;
>  }
> 
> @@ -228,5 +268,10 @@ void *get_hugepage_region(size_t len, ghr_t flags)
>   */
>  void free_hugepage_region(void *ptr)
>  {
> +     /* Buffers may be offset for cache line coloring */
> +     DEBUG("free_hugepage_region(%p) unaligned\n", ptr);
> +     ptr = (void *)ALIGN_DOWN((unsigned long)ptr, gethugepagesize());
> +     DEBUG("free_hugepage_region(%p) aligned\n", ptr);
> +
>       free_huge_pages(ptr);
>  }
> diff --git a/hugetlbfs.h b/hugetlbfs.h
> index ebb676c..ecd178b 100644
> --- a/hugetlbfs.h
> +++ b/hugetlbfs.h
> @@ -60,13 +60,17 @@ void free_huge_pages(void *ptr);
>   * GHR_FALLBACK - Use the default hugepage size if possible but fallback to
>   *             smaller pages if necessary
>   * GHR_STRICT   - Use hugepages of some size or return NULL
> + * GHP_COLOR    - Use bytes wasted due to alignment to offset the buffer
> + *             by a random cache line. This gives better average
> + *             performance with many buffers
>   */
>  typedef unsigned long ghr_t;
>  #define GHR_STRICT   ((ghr_t)0x10000000U)
>  #define GHR_FALLBACK ((ghr_t)0x20000000U)
> -#define GHR_DEFAULT  GHR_FALLBACK
> +#define GHR_COLOR    ((ghr_t)0x40000000U)
> +#define GHR_DEFAULT  (GHR_FALLBACK|GHR_COLOR)
> 
> -#define GHR_MASK     (GHR_FALLBACK|GHR_STRICT)
> +#define GHR_MASK     (GHR_FALLBACK|GHR_STRICT|GHR_COLOR)
> 
>  /* Allocation functions for regions backed by hugepages */
>  void *get_hugepage_region(size_t len, ghr_t flags);
> diff --git a/man/get_hugepage_region.3 b/man/get_hugepage_region.3
> index ce0b018..88fd940 100644
> --- a/man/get_hugepage_region.3
> +++ b/man/get_hugepage_region.3
> @@ -47,11 +47,18 @@ Use base pages if there are an insufficient number of 
> huge pages.
>  .B GHR_STRICT
>  Use hugepages or return NULL.
> 
> +.B GHR_COLOR
> +When specified, bytes that would be wasted due to alignment are used to
> +color the buffer by offsetting it by a random cacheline within the hugepage.
> +This avoids a performance problem whereby multiple buffers use the same
> +cache lines at the same offsets. If it is not important that the start of the
> +buffer be page-aligned, specify this flag.
> +
>  .B GHR_DEFAULT
> 
>  The library chooses a sensible combination of flags for allocating a region 
> of
>  memory. The current default is:
> -     GHR_FALLBACK
> +     GHR_FALLBACK | GHR_COLOR
> 
>  .PP
> 
-- 
Adam Litke - (agl at us.ibm.com)
IBM Linux Technology Center


-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to