Use of madvise() on a hugetlbfs-backed memory region can result in reserves being leaked. This may only affect recent kernels (e.g. 2.6.29). This is not fixed upstream and this patch is to have a reproduction step. When this gets fixed, I'll update the test with the commit id.
Signed-off-by: Mel Gorman <m...@csn.ul.ie> --- tests/Makefile | 2 - tests/madvise_reserve.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/run_tests.py | 1 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/tests/Makefile b/tests/Makefile index 31b1b3b..d3efe79 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -8,7 +8,7 @@ LIB_TESTS = gethugepagesize test_root find_path unlinked_fd misalign \ map_high_truncate_2 truncate_above_4GB direct \ misaligned_offset brk_near_huge task-size-overrun stack_grow_into_huge \ counters quota heap-overflow get_huge_pages get_hugepage_region \ - shmoverride_linked gethugepagesizes + shmoverride_linked gethugepagesizes madvise_reserve LIB_TESTS_64 = straddle_4GB huge_at_4GB_normal_below \ huge_below_4GB_normal_above NOLIB_TESTS = malloc malloc_manysmall dummy heapshrink shmoverride_unlinked diff --git a/tests/madvise_reserve.c b/tests/madvise_reserve.c new file mode 100644 index 0000000..e7d78b1 --- /dev/null +++ b/tests/madvise_reserve.c @@ -0,0 +1,85 @@ +/* + * libhugetlbfs - Easy use of Linux hugepages + * Copyright (C) 2005-2006 IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <setjmp.h> + +#include <hugetlbfs.h> + +#include "hugetests.h" + +/* + * Test rationale: + * + * madvise() on some kernels can cause the reservation counter to get + * corrupted. The problem is that the patches are allocated for the + * reservation but not faulted in at the time of allocation. The + * counters do not get updated and effectively "leak". This test + * identifies whether the kernel is vunerable to the problem or not. + * At time of writing, there is no fix for recent kernels. + */ + +int main(int argc, char *argv[]) +{ + long hpage_size; + int fd; + void *p; + unsigned long initial_rsvd, map_rsvd, madvise_rsvd, end_rsvd; + + test_init(argc, argv); + + /* Setup */ + hpage_size = check_hugepagesize(); + fd = hugetlbfs_unlinked_fd(); + if (fd < 0) + FAIL("hugetlbfs_unlinked_fd()"); + initial_rsvd = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD); + verbose_printf("Reserve count before map: %lu\n", initial_rsvd); + + /* mmap a region and record reservations */ + p = mmap(NULL, hpage_size, PROT_READ|PROT_WRITE, MAP_SHARED, + fd, 0); + if (p == MAP_FAILED) + FAIL("mmap(): %s", strerror(errno)); + map_rsvd = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD); + verbose_printf("Reserve count after map: %lu\n", map_rsvd); + + /* madvise the region and record reservations */ + if (madvise(p, hpage_size, MADV_WILLNEED) == -1) + FAIL("madvise(): %s", strerror(errno)); + madvise_rsvd = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD); + verbose_printf("Reserve count after madvise: %lu\n", madvise_rsvd); + + /* Free region */ + munmap(p, hpage_size); + close(fd); + end_rsvd = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD); + verbose_printf("Reserve count after close(): %lu\n", end_rsvd); + + /* Reserve count should match initial reserve count */ + if (end_rsvd != initial_rsvd) + FAIL("Reserve leaked: %lu != %lu\n", end_rsvd, initial_rsvd); + + PASS(); +} diff --git a/tests/run_tests.py b/tests/run_tests.py index 042d427..6510787 100755 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -479,6 +479,7 @@ def functional_tests(): do_test("brk_near_huge") do_test("task-size-overrun") do_test("stack_grow_into_huge") + do_test("madvise_reserve") # Tests requiring an active mount and hugepage COW do_test("private") ------------------------------------------------------------------------------ The NEW KODAK i700 Series Scanners deliver under ANY circumstances! Your production scanning environment may not be a perfect world - but thanks to Kodak, there's a perfect scanner to get the job done! With the NEW KODAK i700 Series Scanner you'll get full speed at 300 dpi even with all image processing features enabled. http://p.sf.net/sfu/kodak-com _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel