The get_huge_pages() API is a close-to-kernel interface for the direct allocation of hugepages. This forces the caller to deal with alignment and fallback to base pages where suitable. For the casual user of hugepages that does not care for such things, this patch adds get_hugepage_region(). It allocates regions of memory that are backed by hugepages where possible but callers are not required to align their length and can request fallback to base pages.
Signed-off-by: Mel Gorman <[EMAIL PROTECTED]> Acked-by: Adam Litke <[EMAIL PROTECTED]> --- Makefile | 5 +- alloc.c | 58 +++++++++++++++++++- hugetlbfs.h | 23 +++++++- man/get_huge_pages.3 | 2 + man/get_hugepage_region.3 | 79 ++++++++++++++++++++++++++ tests/Makefile | 4 +- tests/get_hugepage_region.c | 129 +++++++++++++++++++++++++++++++++++++++++++ tests/run_tests.sh | 3 + version.lds | 2 + 9 files changed, 300 insertions(+), 5 deletions(-) create mode 100644 man/get_hugepage_region.3 create mode 100644 tests/get_hugepage_region.c diff --git a/Makefile b/Makefile index 4554154..40c8c45 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ BIN_OBJ_DIR=obj INSTALL_BIN = hugectl hugeedit hugeadm pagesize INSTALL_HEADERS = hugetlbfs.h INSTALL_MAN1 = pagesize.1 -INSTALL_MAN3 = get_huge_pages.3 gethugepagesizes.3 getpagesizes.3 +INSTALL_MAN3 = get_huge_pages.3 get_hugepage_region.3 \ + gethugepagesizes.3 getpagesizes.3 INSTALL_MAN7 = libhugetlbfs.7 INSTALL_MAN8 = hugectl.8 hugeedit.8 hugeadm.8 LDSCRIPT_TYPES = B BDT @@ -379,7 +380,9 @@ install-man: gzip -f $(DESTDIR)$(MANDIR3)/$$x; \ done rm -f $(DESTDIR)$(MANDIR3)/free_huge_pages.3.gz + rm -f $(DESTDIR)$(MANDIR3)/free_hugepage_region.3.gz ln -s get_huge_pages.3.gz $(DESTDIR)$(MANDIR3)/free_huge_pages.3.gz + ln -s get_hugepage_region.3.gz $(DESTDIR)$(MANDIR3)/free_hugepage_region.3.gz for x in $(INSTALL_MAN7); do \ $(INSTALL) -m 444 man/$$x $(DESTDIR)$(MANDIR7); \ gzip -f $(DESTDIR)$(MANDIR7)/$$x; \ diff --git a/alloc.c b/alloc.c index 6e026c5..74bb5a4 100644 --- a/alloc.c +++ b/alloc.c @@ -35,7 +35,7 @@ static void *fallback_base_pages(size_t len, ghp_t flags) { int fd; void *buf; - DEBUG("get_huge_pages: Falling back to base pages\n"); + DEBUG("get_hugepage_region: Falling back to base pages\n"); /* * Map /dev/zero instead of MAP_ANONYMOUS avoid VMA mergings. Freeing @@ -78,6 +78,10 @@ void *get_huge_pages(size_t len, ghp_t flags) void *buf; int heap_fd; + /* Catch an altogether-too easy typo */ + if (flags & GHR_MASK) + ERROR("Improper use of GHR_* in get_huge_pages()\n"); + /* Create a file descriptor for the new region */ heap_fd = hugetlbfs_unlinked_fd(); if (heap_fd < 0) { @@ -174,3 +178,55 @@ void free_huge_pages(void *ptr) fclose(fd); } + +/** + * get_hugepage_region - Allocate an amount of memory backed by huge pages + * + * len: Size of the region to allocate + * flags: Flags specifying the behaviour of the function + * + * This function allocates a region of memory backed by huge pages. Care should + * be taken when using this function as a drop-in replacement for malloc() as + * memory can be wasted if the length is not hugepage-aligned. This function + * is more relaxed than get_huge_pages() in that it allows fallback to small + * pages when requested. + */ +void *get_hugepage_region(size_t len, ghr_t flags) +{ + size_t aligned_len, wastage; + void *buf; + + /* Catch an altogether-too easy typo */ + if (flags & GHP_MASK) + ERROR("Improper use of GHP_* in get_hugepage_region()\n"); + + /* Align the len parameter to a hugepage boundary and allocate */ + aligned_len = ALIGN(len, gethugepagesize()); + buf = get_huge_pages(aligned_len, GHP_DEFAULT); + if (buf == NULL && (flags & GHR_FALLBACK)) { + aligned_len = ALIGN(len, getpagesize()); + buf = fallback_base_pages(len, flags); + } + + /* Calculate wastage */ + wastage = aligned_len - len; + if (wastage != 0) + DEBUG("get_hugepage_region: Wasted %zd bytes due to alignment\n", + wastage); + + return buf; +} + +/** + * free_hugepage_region - Free a region allocated by get_hugepage_region + * ptr - The pointer to the buffer returned by get_hugepage_region + * + * This function finds a region to free based on the contents of + * /proc/pid/maps. The assumption is made that the ptr is the start of + * a hugepage region allocated with get_hugepage_region. No checking is made + * that the pointer is to a hugepage backed region. + */ +void free_hugepage_region(void *ptr) +{ + free_huge_pages(ptr); +} diff --git a/hugetlbfs.h b/hugetlbfs.h index 0efa02c..ebb676c 100644 --- a/hugetlbfs.h +++ b/hugetlbfs.h @@ -46,9 +46,30 @@ int hugetlbfs_unlinked_fd_for_size(long page_size); */ typedef unsigned long ghp_t; #define GHP_DEFAULT ((ghp_t)0x01UL) +#define GHP_MASK (GHP_DEFAULT) -/* Direct alloc functions */ +/* Direct alloc functions for hugepages */ void *get_huge_pages(size_t len, ghp_t flags); void free_huge_pages(void *ptr); +/* + * Region alloc flags and types + * + * GHR_DEFAULT - Use a combination of flags deemed to be a sensible default + * by the current implementation of the library + * GHR_FALLBACK - Use the default hugepage size if possible but fallback to + * smaller pages if necessary + * GHR_STRICT - Use hugepages of some size or return NULL + */ +typedef unsigned long ghr_t; +#define GHR_STRICT ((ghr_t)0x10000000U) +#define GHR_FALLBACK ((ghr_t)0x20000000U) +#define GHR_DEFAULT GHR_FALLBACK + +#define GHR_MASK (GHR_FALLBACK|GHR_STRICT) + +/* Allocation functions for regions backed by hugepages */ +void *get_hugepage_region(size_t len, ghr_t flags); +void free_hugepage_region(void *ptr); + #endif /* _HUGETLBFS_H */ diff --git a/man/get_huge_pages.3 b/man/get_huge_pages.3 index 69b6a65..86d03c9 100644 --- a/man/get_huge_pages.3 +++ b/man/get_huge_pages.3 @@ -64,6 +64,8 @@ mmap() was due to. , .I gethugepagesize(3) , +.I get_hugepage_region(3) +, .I libhugetlbfs(7) .SH AUTHORS libhugetlbfs was written by various people on the libhugetlbfs-devel diff --git a/man/get_hugepage_region.3 b/man/get_hugepage_region.3 new file mode 100644 index 0000000..ce0b018 --- /dev/null +++ b/man/get_hugepage_region.3 @@ -0,0 +1,79 @@ +.\" Hey, EMACS: -*- nroff -*- +.\" First parameter, NAME, should be all caps +.\" Second parameter, SECTION, should be 1-8, maybe w/ subsection +.\" other parameters are allowed: see man(7), man(1) +.TH GET_HUGEPAGE_REGION 3 "November 7, 2008" +.\" Please adjust this date whenever revising the manpage. +.\" +.\" Some roff macros, for reference: +.\" .nh disable hyphenation +.\" .hy enable hyphenation +.\" .ad l left justify +.\" .ad b justify to both left and right margins +.\" .nf disable filling +.\" .fi enable filling +.\" .br insert line break +.\" .sp <n> insert n+1 empty lines +.\" for manpage-specific macros, see man(7) +.SH NAME +get_hugepage_region, free_hugepage_region \- Allocate and free regions of memory that use hugepages where possible +.SH SYNOPSIS +.B #include <hugetlbfs.h> +.br + +.br +.B void *get_hugepage_region(size_t len, ghr_t flags); +.br +.B void free_hugepage_region(void *ptr); +.SH DESCRIPTION + +\fBget_hugepage_region()\fP allocates a memory region \fBlen\fP bytes in size +backed by hugepages. Hugepages may be of benefit to applications that use +large amounts of address space and suffer a performance hit due to TLB +misses. Wall-clock time or oprofile can be used to determine if there is +a performance benefit from using hugepages or not. + +Unlike \fBget_huge_pages()\fB, \fBlen\fP does not have to be hugepage-aligned +although memory may be wasted due to alignment. The caller may also specify +that base pages be used in the event there are no hugepages available. + +The \fBflags\fP argument changes the behaviour of the function. Flags may +be or'd together. + +.TP +.B GHR_FALLBACK +Use base pages if there are an insufficient number of huge pages. + +.B GHR_STRICT +Use hugepages or return NULL. + +.B GHR_DEFAULT + +The library chooses a sensible combination of flags for allocating a region of +memory. The current default is: + GHR_FALLBACK + +.PP + +\fBfree_hugepage_region()\fP frees a region of memory allocated by +\fBget_hugepage_region()\fP. The behaviour of the function if another +pointer is used, valid or otherwise, is undefined. + +.SH RETURN VALUE + +On success, a pointer is returned for to the allocated memory. On +error, NULL is returned. errno will be set based on what the failure of +mmap() was due to. + +.SH SEE ALSO +.I oprofile(1) +, +.I gethugepagesize(3) +, +.I get_huge_pages(3) +, +.I libhugetlbfs(7) +.SH AUTHORS +libhugetlbfs was written by various people on the libhugetlbfs-devel +mailing list. + diff --git a/tests/Makefile b/tests/Makefile index 009f75f..4313084 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -7,8 +7,8 @@ LIB_TESTS = gethugepagesize test_root find_path unlinked_fd misalign \ truncate_reserve_wraparound truncate_sigbus_versus_oom \ map_high_truncate_2 truncate_above_4GB direct \ misaligned_offset brk_near_huge task-size-overrun stack_grow_into_huge \ - counters quota heap-overflow get_huge_pages shmoverride_linked \ - gethugepagesizes + counters quota heap-overflow get_huge_pages get_hugepage_region \ + shmoverride_linked gethugepagesizes LIB_TESTS_64 = straddle_4GB huge_at_4GB_normal_below \ huge_below_4GB_normal_above NOLIB_TESTS = malloc malloc_manysmall dummy heapshrink shmoverride_unlinked diff --git a/tests/get_hugepage_region.c b/tests/get_hugepage_region.c new file mode 100644 index 0000000..81428e4 --- /dev/null +++ b/tests/get_hugepage_region.c @@ -0,0 +1,129 @@ +/* + * libhugetlbfs - Easy use of Linux hugepages + * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +#include <hugetlbfs.h> + +#include "hugetests.h" + +long hpage_size; +long oc_hugepages = -1; + +/* Restore nr_overcommit_hugepages */ +void cleanup(void) +{ + if (oc_hugepages != -1) + set_nr_overcommit_hugepages(hpage_size, oc_hugepages); +} + +/* Confirm a region really frees, only really important for GHR_FALLBACK */ +void free_and_confirm_region_free(void *p, int line) { + unsigned char vec = 0; + free_hugepage_region(p); + if (mincore(p, 4, &vec) == 0 || vec) + FAIL("free_hugepage_region did not free region at line %d", line); +} + +#define TESTLEN ((num_hugepages - 1) * hpage_size + hpage_size / 2) + +void test_GHR_STRICT(int num_hugepages) +{ + int err; + void *p = get_hugepage_region(TESTLEN, GHR_DEFAULT); + if (p == NULL) + FAIL("get_hugepage_region() for %d hugepages", num_hugepages); + + memset(p, 1, TESTLEN); + + err = test_addr_huge(p + (num_hugepages - 1) * hpage_size); + if (err != 1) + FAIL("Returned page is not hugepage"); + + free_and_confirm_region_free(p, __LINE__); + err = test_addr_huge(p); + if (err == 1) + FAIL("hugepage was not correctly freed"); +} + +void test_GHR_FALLBACK(void) +{ + int err; + long rsvd_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD); + long num_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_TOTAL) + - rsvd_hugepages; + + /* We must disable overcommitted huge pages to test this */ + oc_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_OC); + set_nr_overcommit_hugepages(hpage_size, 0); + + /* We should be able to allocate the whole pool */ + void *p = get_hugepage_region(TESTLEN, GHR_DEFAULT); + if (p == NULL) + FAIL("test_GHR_FALLBACK(GHR_DEFAULT) failed for %ld hugepages", + num_hugepages); + memset(p, 1, TESTLEN); + err = test_addr_huge(p + (num_hugepages - 1) * hpage_size); + if (err != 1) + FAIL("Returned page is not hugepage"); + free_and_confirm_region_free(p, __LINE__); + + /* We should fail allocating too much */ + num_hugepages++; + p = get_hugepage_region(TESTLEN, GHR_STRICT); + if (p != NULL) + FAIL("test_GHR_FALLBACK() for %ld expected fail, got success", num_hugepages); + + /* GHR_FALLBACK should succeed by allocating base pages */ + p = get_hugepage_region(TESTLEN, GHR_FALLBACK); + if (p == NULL) + FAIL("test_GHR_FALLBACK(GHR_FALLBACK) failed for %ld hugepages", + num_hugepages); + memset(p, 1, TESTLEN); + err = test_addr_huge(p + (num_hugepages - 1) * hpage_size); + if (err == 1) + FAIL("Returned page is not a base page"); + + /* + * We allocate a second fallback region to see can they be told apart + * on free. Merging VMAs would cause problems + */ + void *pb = get_hugepage_region(TESTLEN, GHR_FALLBACK); + if (pb == NULL) + FAIL("test_GHR_FALLBACK(GHR_FALLBACK) x2 failed for %ld hugepages", + num_hugepages); + memset(pb, 1, TESTLEN); + + free_and_confirm_region_free(pb, __LINE__); + free_and_confirm_region_free(p, __LINE__); +} + +int main(int argc, char *argv[]) +{ + test_init(argc, argv); + hpage_size = gethugepagesize(); + check_free_huge_pages(4); + test_GHR_STRICT(1); + test_GHR_STRICT(4); + test_GHR_FALLBACK(); + + PASS(); +} diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 9064451..f163b11 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -341,6 +341,9 @@ check_linkhuge_tests # Test direct allocation API run_test get_huge_pages +# Test hugepage-backed region API + run_test get_hugepage_region + # Test overriding of shmget() run_test shmoverride_linked run_test LD_PRELOAD=libhugetlbfs.so shmoverride_unlinked diff --git a/version.lds b/version.lds index 86cc6b7..e76b8f7 100644 --- a/version.lds +++ b/version.lds @@ -18,6 +18,8 @@ HTLBFS_2.0 { HTLBFS_2.1 { global: + get_hugepage_region; + free_hugepage_region; gethugepagesizes; getpagesizes; hugetlbfs_find_path_for_size; -- 1.5.6.5 ------------------------------------------------------------------------- This SF.Net email is sponsored by the Moblin Your Move Developer's challenge Build the coolest Linux based applications with Moblin SDK & win great prizes Grand prize is a trip for two to an Open Source event anywhere in the world http://moblin-contest.org/redirect.php?banner_id=100&url=/ _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel