get_huge_pages() is an API for the allocation of hugepage-backed regions of
memory. In the event it returns NULL, the application is expected to recover,
possibly by using mmap() or malloc() to use base pages. This is boiler-plate
code that is an unnecessary burden on the application developer. This patch
adds GHP_FALLBACK to indicate get_huge_pages() should use smaller pages if
hugepages of the default size are not available.
Changelog since V3
o Map /dev/zero instead of MAP_ANONYMOUS. free_huge_pages depends on
/proc/pid/maps to find the length of mappings. With GHP_FALLBACK, anonymous
regions can merge and free_huge_pages() gets confused
Changelog since V2
o Remove reference to "heap" in the warning
Changelog since V1
o Do not specify HUGETLB_VERBOSE when running the test
o Account for rsvd pages when running the test
o Change definition of GHP_FALLBACK to mean "use smaller pages"
Signed-off-by: Mel Gorman <[EMAIL PROTECTED]>
---
alloc.c | 42 ++++++++++++++++++++++++++++++++++
hugetlbfs.h | 3 ++
tests/get_huge_pages.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/alloc.c b/alloc.c
index 65bd063..596c3c1 100644
--- a/alloc.c
+++ b/alloc.c
@@ -18,6 +18,7 @@
*/
#define _GNU_SOURCE
+#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -29,6 +30,39 @@
#include "hugetlbfs.h"
#include "libhugetlbfs_internal.h"
+/* Allocate base pages if huge page allocation fails */
+static void *fallback_base_pages(size_t len, ghp_t flags)
+{
+ int fd;
+ void *buf;
+ DEBUG("get_huge_pages: Falling back to base pages\n");
+
+ /*
+ * Map /dev/zero instead of MAP_ANONYMOUS avoid VMA mergings. Freeing
+ * pages depends on /proc/pid/maps to find lengths of allocations.
+ * This is a bit lazy and if found to be costly due to either the
+ * extra open() or virtual address space usage, we could track active
+ * mappings in a lock-protected list instead.
+ */
+ fd = open("/dev/zero", O_RDWR);
+ if (fd == -1) {
+ ERROR("get_huge_pages: Failed to open /dev/zero for fallback");
+ return NULL;
+ }
+
+ buf = mmap(NULL, len,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE,
+ fd, 0);
+ if (buf == MAP_FAILED) {
+ WARNING("Base page fallback failed: %s\n", strerror(errno));
+ buf = NULL;
+ }
+ close(fd);
+
+ return buf;
+}
+
/**
* get_huge_pages - Allocate an amount of memory backed by huge pages
* len: Size of the region to allocate
@@ -58,7 +92,13 @@ void *get_huge_pages(size_t len, ghp_t flags)
MAP_PRIVATE, heap_fd, len);
if (buf == MAP_FAILED) {
close(heap_fd);
- WARNING("New heap segment map failed: %s\n", strerror(errno));
+
+ /* Try falling back to base pages if allowed */
+ if (flags & GHP_FALLBACK)
+ return fallback_base_pages(len, flags);
+
+ WARNING("get_huge_pages: New region mapping failed (flags:
0x%lX): %s\n",
+ flags, strerror(errno));
return NULL;
}
diff --git a/hugetlbfs.h b/hugetlbfs.h
index 3945836..91d021f 100644
--- a/hugetlbfs.h
+++ b/hugetlbfs.h
@@ -38,8 +38,11 @@ long dump_proc_pid_maps(void);
*
* GHP_DEFAULT - Use a combination of flags deemed to be a sensible default
* by the current implementation of the library
+ * GHP_FALLBACK - Use the default hugepage size if possible but fallback to
+ * smaller pages if necessary
*/
typedef unsigned long ghp_t;
+#define GHP_FALLBACK (0x01UL)
#define GHP_DEFAULT (0)
/* Direct alloc functions */
diff --git a/tests/get_huge_pages.c b/tests/get_huge_pages.c
index b05e849..05e3ed1 100644
--- a/tests/get_huge_pages.c
+++ b/tests/get_huge_pages.c
@@ -25,6 +25,14 @@
#include "hugetests.h"
+/* Confirm a region really frees, only really important for GHP_FALLBACK */
+void free_and_confirm_region_free(void *p, int line) {
+ unsigned char vec = 0;
+ free_huge_pages(p);
+ if (mincore(p, 4, &vec) == 0 || vec)
+ FAIL("free_huge_pages did not free region at line %d", line);
+}
+
void test_get_huge_pages(int num_hugepages)
{
int err;
@@ -39,18 +47,67 @@ void test_get_huge_pages(int num_hugepages)
if (err != 1)
FAIL("Returned page is not hugepage");
- free_huge_pages(p);
+ free_and_confirm_region_free(p, __LINE__);
err = test_addr_huge(p);
if (err == 1)
FAIL("hugepage was not correctly freed");
}
+void test_GHP_FALLBACK(void)
+{
+ int err;
+ long hpage_size = check_hugepagesize();
+ long rsvd_hugepages = read_meminfo("HugePages_Rsvd:");
+ long num_hugepages = read_meminfo("HugePages_Total:") - rsvd_hugepages;
+
+ /* We should be able to allocate the whole pool */
+ void *p = get_huge_pages(num_hugepages * hpage_size, GHP_DEFAULT);
+ if (p == NULL)
+ FAIL("test_GHP_FALLBACK(GHP_DEFAULT) failed for %ld hugepages",
+ num_hugepages);
+ memset(p, 1, hpage_size);
+ err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
+ if (err != 1)
+ FAIL("Returned page is not hugepage");
+ free_and_confirm_region_free(p, __LINE__);
+
+ /* We should fail allocating too much */
+ num_hugepages++;
+ p = get_huge_pages(num_hugepages * hpage_size, GHP_DEFAULT);
+ if (p != NULL)
+ FAIL("test_GHP_FALLBACK() for %ld expected fail, got success",
num_hugepages);
+
+ /* GHP_FALLBACK should succeed by allocating base pages */
+ p = get_huge_pages(num_hugepages * hpage_size, GHP_FALLBACK);
+ if (p == NULL)
+ FAIL("test_GHP_FALLBACK(GHP_FALLBACK) failed for %ld hugepages",
+ num_hugepages);
+ memset(p, 1, hpage_size);
+ err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
+ if (err == 1)
+ FAIL("Returned page is not a base page");
+
+ /*
+ * We allocate a second fallback region to see can they be told apart
+ * on free. Merging VMAs would cause problems
+ */
+ void *pb = get_huge_pages(num_hugepages * hpage_size, GHP_FALLBACK);
+ if (pb == NULL)
+ FAIL("test_GHP_FALLBACK(GHP_FALLBACK) x2 failed for %ld
hugepages",
+ num_hugepages);
+ memset(pb, 1, hpage_size);
+
+ free_and_confirm_region_free(pb, __LINE__);
+ free_and_confirm_region_free(p, __LINE__);
+}
+
int main(int argc, char *argv[])
{
test_init(argc, argv);
check_free_huge_pages(4);
test_get_huge_pages(1);
test_get_huge_pages(4);
+ test_GHP_FALLBACK();
PASS();
}
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel