get_huge_pages() is an API for the allocation of hugepage-backed regions of
memory. In the event it returns NULL, the application is expected to recover,
possibly by using mmap() or malloc() to use base pages. This is boiler-plate
code that is an unnecessary burden on the application developer. This patch
adds GHP_FALLBACK to indicate get_huge_pages() should use smaller pages if
hugepages of the default size are not available.

Changelog since V3
o Map /dev/zero instead of MAP_ANONYMOUS. free_huge_pages depends on
  /proc/pid/maps to find the length of mappings. With GHP_FALLBACK, anonymous
  regions can merge and free_huge_pages() gets confused

Changelog since V2
o Remove reference to "heap" in the warning

Changelog since V1
o Do not specify HUGETLB_VERBOSE when running the test
o Account for rsvd pages when running the test
o Change definition of GHP_FALLBACK to mean "use smaller pages"

Signed-off-by: Mel Gorman <[EMAIL PROTECTED]>
--- 
 alloc.c                |   42 ++++++++++++++++++++++++++++++++++
 hugetlbfs.h            |    3 ++
 tests/get_huge_pages.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/alloc.c b/alloc.c
index 65bd063..596c3c1 100644
--- a/alloc.c
+++ b/alloc.c
@@ -18,6 +18,7 @@
  */
 
 #define _GNU_SOURCE
+#include <fcntl.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -29,6 +30,39 @@
 #include "hugetlbfs.h"
 #include "libhugetlbfs_internal.h"
 
+/* Allocate base pages if huge page allocation fails */
+static void *fallback_base_pages(size_t len, ghp_t flags)
+{
+       int fd;
+       void *buf;
+       DEBUG("get_huge_pages: Falling back to base pages\n");
+
+       /*
+        * Map /dev/zero instead of MAP_ANONYMOUS avoid VMA mergings. Freeing
+        * pages depends on /proc/pid/maps to find lengths of allocations.
+        * This is a bit lazy and if found to be costly due to either the
+        * extra open() or virtual address space usage, we could track active
+        * mappings in a lock-protected list instead.
+        */
+       fd = open("/dev/zero", O_RDWR);
+       if (fd == -1) {
+               ERROR("get_huge_pages: Failed to open /dev/zero for fallback");
+               return NULL;
+       }
+
+       buf = mmap(NULL, len,
+                       PROT_READ|PROT_WRITE,
+                       MAP_PRIVATE,
+                       fd, 0);
+       if (buf == MAP_FAILED) {
+               WARNING("Base page fallback failed: %s\n", strerror(errno));
+               buf = NULL;
+       }
+       close(fd);
+
+       return buf;
+}
+
 /**
  * get_huge_pages - Allocate an amount of memory backed by huge pages
  * len: Size of the region to allocate
@@ -58,7 +92,13 @@ void *get_huge_pages(size_t len, ghp_t flags)
                 MAP_PRIVATE, heap_fd, len);
        if (buf == MAP_FAILED) {
                close(heap_fd);
-               WARNING("New heap segment map failed: %s\n", strerror(errno));
+
+               /* Try falling back to base pages if allowed */
+               if (flags & GHP_FALLBACK)
+                       return fallback_base_pages(len, flags);
+
+               WARNING("get_huge_pages: New region mapping failed (flags: 
0x%lX): %s\n",
+                       flags, strerror(errno));
                return NULL;
        }
 
diff --git a/hugetlbfs.h b/hugetlbfs.h
index 3945836..91d021f 100644
--- a/hugetlbfs.h
+++ b/hugetlbfs.h
@@ -38,8 +38,11 @@ long dump_proc_pid_maps(void);
  *
  * GHP_DEFAULT - Use a combination of flags deemed to be a sensible default
  *             by the current implementation of the library
+ * GHP_FALLBACK - Use the default hugepage size if possible but fallback to
+ *             smaller pages if necessary
  */
 typedef unsigned long ghp_t;
+#define GHP_FALLBACK   (0x01UL)
 #define GHP_DEFAULT    (0)
 
 /* Direct alloc functions */
diff --git a/tests/get_huge_pages.c b/tests/get_huge_pages.c
index b05e849..05e3ed1 100644
--- a/tests/get_huge_pages.c
+++ b/tests/get_huge_pages.c
@@ -25,6 +25,14 @@
 
 #include "hugetests.h"
 
+/* Confirm a region really frees, only really important for GHP_FALLBACK */
+void free_and_confirm_region_free(void *p, int line) {
+       unsigned char vec = 0;
+       free_huge_pages(p);
+       if (mincore(p, 4, &vec) == 0 || vec)
+               FAIL("free_huge_pages did not free region at line %d", line);
+}
+
 void test_get_huge_pages(int num_hugepages)
 {
        int err;
@@ -39,18 +47,67 @@ void test_get_huge_pages(int num_hugepages)
        if (err != 1)
                FAIL("Returned page is not hugepage");
 
-       free_huge_pages(p);
+       free_and_confirm_region_free(p, __LINE__);
        err = test_addr_huge(p);
        if (err == 1)
                FAIL("hugepage was not correctly freed");
 }
 
+void test_GHP_FALLBACK(void)
+{
+       int err;
+       long hpage_size = check_hugepagesize();
+       long rsvd_hugepages = read_meminfo("HugePages_Rsvd:");
+       long num_hugepages = read_meminfo("HugePages_Total:") - rsvd_hugepages;
+
+       /* We should be able to allocate the whole pool */
+       void *p = get_huge_pages(num_hugepages * hpage_size, GHP_DEFAULT);
+       if (p == NULL)
+               FAIL("test_GHP_FALLBACK(GHP_DEFAULT) failed for %ld hugepages",
+                       num_hugepages);
+       memset(p, 1, hpage_size);
+       err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
+       if (err != 1)
+               FAIL("Returned page is not hugepage");
+       free_and_confirm_region_free(p, __LINE__);
+
+       /* We should fail allocating too much */
+       num_hugepages++;
+       p = get_huge_pages(num_hugepages * hpage_size, GHP_DEFAULT);
+       if (p != NULL)
+               FAIL("test_GHP_FALLBACK() for %ld expected fail, got success", 
num_hugepages);
+
+       /* GHP_FALLBACK should succeed by allocating base pages */
+       p = get_huge_pages(num_hugepages * hpage_size, GHP_FALLBACK);
+       if (p == NULL)
+               FAIL("test_GHP_FALLBACK(GHP_FALLBACK) failed for %ld hugepages",
+                       num_hugepages);
+       memset(p, 1, hpage_size);
+       err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
+       if (err == 1)
+               FAIL("Returned page is not a base page");
+
+       /*
+        * We allocate a second fallback region to see can they be told apart
+        * on free. Merging VMAs would cause problems
+        */
+       void *pb = get_huge_pages(num_hugepages * hpage_size, GHP_FALLBACK);
+       if (pb == NULL)
+               FAIL("test_GHP_FALLBACK(GHP_FALLBACK) x2 failed for %ld 
hugepages",
+                       num_hugepages);
+       memset(pb, 1, hpage_size);
+
+       free_and_confirm_region_free(pb, __LINE__);
+       free_and_confirm_region_free(p, __LINE__);
+}
+
 int main(int argc, char *argv[])
 {
        test_init(argc, argv);
        check_free_huge_pages(4);
        test_get_huge_pages(1);
        test_get_huge_pages(4);
+       test_GHP_FALLBACK();
 
        PASS();
 }

-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to