In the situation a caller allocates multiple buffers with get_huge_pages(), all the buffers will align to the start of the hugepage. This means the buffers are almost certainly using the same cachelines and operations like copying between the arrays will be mysteriously slow in comparison to buffers allocated from malloc(). In STREAM, allocating the three arrays using get_huge_pages() was approximately 40% the throughput of malloc().
This patch uses bytes that would normally be wasted to offset the buffer by some random cacheline. Effort is made to select a random cache-line to give good average performance. Signed-off-by: Mel Gorman <[EMAIL PROTECTED]> --- alloc.c | 66 ++++++++++++++++++++++++++++++++++++++++------- hugetlbfs.h | 2 + man/get_huge_pages.3 | 7 +++++ tests/get_huge_pages.c | 8 ++++++ 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/alloc.c b/alloc.c index b87a60d..a00bf31 100644 --- a/alloc.c +++ b/alloc.c @@ -26,6 +26,7 @@ #include <unistd.h> #include <sys/mman.h> #include <sys/types.h> +#include <time.h> #include "hugetlbfs.h" #include "libhugetlbfs_internal.h" @@ -69,18 +70,26 @@ static void *fallback_base_pages(size_t len, ghp_t flags) * flags: Flags specifying the behaviour of the function * * This function allocates a region of memory backed by huge pages and - * at least hugepage-aligned. This is not a suitable drop-in for malloc(). - * As the length is always aligned to a hugepage-boundary, on average - * half a hugepage will be wasted unless care is taken. The intention is that - * a malloc-like library uses this function to create additional heap similar - * in principal to what morecore does for glibc malloc. + * at least hugepage-aligned. Care has to be taken when using this as + * a drop-in replacement for malloc(). As the length is always aligned to + * a hugepage-boundary, on average half a hugepage will be wasted unless + * care is taken. Wastage where it occurs is used to offset buffers by a + * random cacheline unless GHP_ALIGN is specified. */ void *get_huge_pages(size_t len, ghp_t flags) { void *buf; int heap_fd; size_t aligned_len, wasteage; + static long cacheline_size = 0; + static int linemod = 0; + /* Lookup our cacheline size once, and select a semi-random line */ + if (cacheline_size == 0) { + cacheline_size = sysconf(_SC_LEVEL2_CACHE_LINESIZE); + linemod = time(NULL); + } + /* Create a file descriptor for the new region */ heap_fd = hugetlbfs_unlinked_fd(); if (heap_fd < 0) { @@ -91,7 +100,7 @@ void *get_huge_pages(size_t len, ghp_t flags) /* Align the len parameter */ aligned_len = ALIGN(len, gethugepagesize()); wasteage = aligned_len - len; - if (wasteage != 0) + if (wasteage != 0 && (flags & GHP_ALIGN)) DEBUG("get_huge_pages: Wasted %zd bytes due to alignment\n", wasteage); @@ -102,8 +111,10 @@ void *get_huge_pages(size_t len, ghp_t flags) close(heap_fd); /* Try falling back to base pages if allowed */ - if (flags & GHP_FALLBACK) - return fallback_base_pages(aligned_len, flags); + if (flags & GHP_FALLBACK) { + buf = fallback_base_pages(aligned_len, flags); + goto offset; + } WARNING("get_huge_pages: New region mapping failed (flags: 0x%lX): %s\n", flags, strerror(errno)); @@ -116,8 +127,10 @@ void *get_huge_pages(size_t len, ghp_t flags) close(heap_fd); /* Try falling back to base pages if allowed */ - if (flags & GHP_FALLBACK) - return fallback_base_pages(aligned_len, flags); + if (flags & GHP_FALLBACK) { + buf = fallback_base_pages(aligned_len, flags); + goto offset; + } } /* Close the file so we do not have to track the descriptor */ @@ -127,6 +140,34 @@ void *get_huge_pages(size_t len, ghp_t flags) return NULL; } +offset: + /* + * Offset the buffer using wasted bytes to avoid using the same + * cache lines for the start of every buffer returned by + * get_huge_pages(). An small effort is made to select a random + * cacheline rather than sequential onces to give decent behaviour + * on average. + */ + if (!(flags & GHP_ALIGN)) { + char *bytebuf = (char *)buf; + int numlines = wasteage / cacheline_size; + int line = 0; + + DEBUG("%d lines of cacheline size %ld due to %zd wastage\n", + numlines, cacheline_size, wasteage); + + if (numlines) { + line = linemod % numlines; + bytebuf += cacheline_size * line; + + /* Pseudo-ish random line selection */ + linemod += len % numlines; + } + DEBUG("Using line offset %d from start\n", line); + + buf = (void *)bytebuf; + } + /* woo, new buffer of shiny */ return buf; } @@ -147,6 +188,11 @@ void free_huge_pages(void *ptr) char line[MAPS_BUF_SZ]; unsigned long start = 0, end = 0; + /* Buffers may be offset for cache line coloring */ + DEBUG("free_huge_pages(%p) unaligned\n", ptr); + ptr = (void *)ALIGN_DOWN((unsigned long)ptr, gethugepagesize()); + DEBUG("free_huge_pages(%p) aligned\n", ptr); + /* * /proc/self/maps is used to determine the length of the original * allocation. As mappings are based on different files, we can diff --git a/hugetlbfs.h b/hugetlbfs.h index 0694a0b..3e2647c 100644 --- a/hugetlbfs.h +++ b/hugetlbfs.h @@ -46,9 +46,11 @@ int hugetlbfs_unlinked_fd_for_size(long page_size); * by the current implementation of the library * GHP_FALLBACK - Use the default hugepage size if possible but fallback to * smaller pages if necessary + * GHP_ALIGN - Always align the returned buffer to the start of the hugepage */ typedef unsigned long ghp_t; #define GHP_FALLBACK (0x01UL) +#define GHP_ALIGN (0x02UL) #define GHP_DEFAULT (0) /* Direct alloc functions */ diff --git a/man/get_huge_pages.3 b/man/get_huge_pages.3 index a2173f8..2dcc1e4 100644 --- a/man/get_huge_pages.3 +++ b/man/get_huge_pages.3 @@ -47,6 +47,13 @@ Allocate a region of memory backed by hugepages. If sufficient hugepages are not available, return an MAP_ANONYMOUS region of memory backed by base page-sized pages instead. +.TP +.B GHP_ALIGN +Always align the returned buffer to the start of the hugepage. By default, +the allocator will use wasted bytes due to alignment to offset the buffer. +This offset prevents the start of buffers always sharing the same cache +color but may be undesirable for callers that expect page-aligned buffers. + .PP \fBfree_huge_pages()\fP frees a region of memory allocated by diff --git a/tests/get_huge_pages.c b/tests/get_huge_pages.c index 62b27ea..a7a5f98 100644 --- a/tests/get_huge_pages.c +++ b/tests/get_huge_pages.c @@ -60,6 +60,14 @@ void test_get_huge_pages(int num_hugepages) err = test_addr_huge(p); if (err == 1) FAIL("hugepage was not correctly freed"); + + /* We should be able to alloc/free unaligned lengths */ + p = get_huge_pages((num_hugepages - 1) * hpage_size + hpage_size / 2, + GHP_DEFAULT); + if (p == NULL) + FAIL("test unaligned allocation failed for %ld hugepages", + num_hugepages); + free_and_confirm_region_free(p, __LINE__); } void test_GHP_FALLBACK(void) -- 1.5.6.5 ------------------------------------------------------------------------- This SF.Net email is sponsored by the Moblin Your Move Developer's challenge Build the coolest Linux based applications with Moblin SDK & win great prizes Grand prize is a trip for two to an Open Source event anywhere in the world http://moblin-contest.org/redirect.php?banner_id=100&url=/ _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel