On Fri, 2008-11-07 at 16:09 +0000, Mel Gorman wrote:
> The get_huge_pages() API is a close-to-kernel interface for the direct
> allocation of hugepages. This forces the caller to deal with alignment and
> fallback to base pages where suitable. For the casual user of hugepages
> that does not care for such things, this patch adds get_hugepage_region().
> It allocates regions of memory that are backed by hugepages where possible
> but callers are not required to align their length and can request fallback
> to base pages.
> 
> Signed-off-by: Mel Gorman <[EMAIL PROTECTED]>

Acked-by: Adam Litke <[EMAIL PROTECTED]>

> ---
>  Makefile                    |    5 +-
>  alloc.c                     |   58 +++++++++++++++++++-
>  hugetlbfs.h                 |   23 +++++++-
>  man/get_huge_pages.3        |    2 +
>  man/get_hugepage_region.3   |   79 ++++++++++++++++++++++++++
>  tests/Makefile              |    4 +-
>  tests/get_hugepage_region.c |  129 
> +++++++++++++++++++++++++++++++++++++++++++
>  tests/run_tests.sh          |    3 +
>  version.lds                 |    2 +
>  9 files changed, 300 insertions(+), 5 deletions(-)
>  create mode 100644 man/get_hugepage_region.3
>  create mode 100644 tests/get_hugepage_region.c
> 
> diff --git a/Makefile b/Makefile
> index 4554154..40c8c45 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -8,7 +8,8 @@ BIN_OBJ_DIR=obj
>  INSTALL_BIN = hugectl hugeedit hugeadm pagesize
>  INSTALL_HEADERS = hugetlbfs.h
>  INSTALL_MAN1 = pagesize.1
> -INSTALL_MAN3 = get_huge_pages.3 gethugepagesizes.3 getpagesizes.3
> +INSTALL_MAN3 = get_huge_pages.3 get_hugepage_region.3 \
> +             gethugepagesizes.3 getpagesizes.3
>  INSTALL_MAN7 = libhugetlbfs.7
>  INSTALL_MAN8 = hugectl.8 hugeedit.8 hugeadm.8
>  LDSCRIPT_TYPES = B BDT
> @@ -379,7 +380,9 @@ install-man:
>               gzip -f $(DESTDIR)$(MANDIR3)/$$x; \
>       done
>       rm -f $(DESTDIR)$(MANDIR3)/free_huge_pages.3.gz
> +     rm -f $(DESTDIR)$(MANDIR3)/free_hugepage_region.3.gz
>       ln -s get_huge_pages.3.gz $(DESTDIR)$(MANDIR3)/free_huge_pages.3.gz
> +     ln -s get_hugepage_region.3.gz 
> $(DESTDIR)$(MANDIR3)/free_hugepage_region.3.gz
>       for x in $(INSTALL_MAN7); do \
>               $(INSTALL) -m 444 man/$$x $(DESTDIR)$(MANDIR7); \
>               gzip -f $(DESTDIR)$(MANDIR7)/$$x; \
> diff --git a/alloc.c b/alloc.c
> index 6e026c5..74bb5a4 100644
> --- a/alloc.c
> +++ b/alloc.c
> @@ -35,7 +35,7 @@ static void *fallback_base_pages(size_t len, ghp_t flags)
>  {
>       int fd;
>       void *buf;
> -     DEBUG("get_huge_pages: Falling back to base pages\n");
> +     DEBUG("get_hugepage_region: Falling back to base pages\n");
> 
>       /*
>        * Map /dev/zero instead of MAP_ANONYMOUS avoid VMA mergings. Freeing
> @@ -78,6 +78,10 @@ void *get_huge_pages(size_t len, ghp_t flags)
>       void *buf;
>       int heap_fd;
> 
> +     /* Catch an altogether-too easy typo */
> +     if (flags & GHR_MASK)
> +             ERROR("Improper use of GHR_* in get_huge_pages()\n");
> +
>       /* Create a file descriptor for the new region */
>       heap_fd = hugetlbfs_unlinked_fd();
>       if (heap_fd < 0) {
> @@ -174,3 +178,55 @@ void free_huge_pages(void *ptr)
> 
>       fclose(fd);
>  }
> +
> +/**
> + * get_hugepage_region - Allocate an amount of memory backed by huge pages
> + *
> + * len: Size of the region to allocate
> + * flags: Flags specifying the behaviour of the function
> + *
> + * This function allocates a region of memory backed by huge pages. Care 
> should
> + * be taken when using this function as a drop-in replacement for malloc() as
> + * memory can be wasted if the length is not hugepage-aligned. This function
> + * is more relaxed than get_huge_pages() in that it allows fallback to small
> + * pages when requested.
> + */
> +void *get_hugepage_region(size_t len, ghr_t flags)
> +{
> +     size_t aligned_len, wastage;
> +     void *buf;
> +
> +     /* Catch an altogether-too easy typo */
> +     if (flags & GHP_MASK)
> +             ERROR("Improper use of GHP_* in get_hugepage_region()\n");
> +
> +     /* Align the len parameter to a hugepage boundary and allocate */
> +     aligned_len = ALIGN(len, gethugepagesize());
> +     buf = get_huge_pages(aligned_len, GHP_DEFAULT);
> +     if (buf == NULL && (flags & GHR_FALLBACK)) {
> +             aligned_len = ALIGN(len, getpagesize());
> +             buf = fallback_base_pages(len, flags);
> +     }
> +
> +     /* Calculate wastage */
> +     wastage = aligned_len - len;
> +     if (wastage != 0)
> +             DEBUG("get_hugepage_region: Wasted %zd bytes due to 
> alignment\n",
> +                     wastage);
> +
> +     return buf;
> +}
> +
> +/**
> + * free_hugepage_region - Free a region allocated by get_hugepage_region
> + * ptr - The pointer to the buffer returned by get_hugepage_region
> + *
> + * This function finds a region to free based on the contents of
> + * /proc/pid/maps. The assumption is made that the ptr is the start of
> + * a hugepage region allocated with get_hugepage_region. No checking is made
> + * that the pointer is to a hugepage backed region.
> + */
> +void free_hugepage_region(void *ptr)
> +{
> +     free_huge_pages(ptr);
> +}
> diff --git a/hugetlbfs.h b/hugetlbfs.h
> index 0efa02c..ebb676c 100644
> --- a/hugetlbfs.h
> +++ b/hugetlbfs.h
> @@ -46,9 +46,30 @@ int hugetlbfs_unlinked_fd_for_size(long page_size);
>   */
>  typedef unsigned long ghp_t;
>  #define GHP_DEFAULT  ((ghp_t)0x01UL)
> +#define GHP_MASK     (GHP_DEFAULT)
> 
> -/* Direct alloc functions */
> +/* Direct alloc functions for hugepages */
>  void *get_huge_pages(size_t len, ghp_t flags);
>  void free_huge_pages(void *ptr);
> 
> +/*
> + * Region alloc flags and types
> + *
> + * GHR_DEFAULT  - Use a combination of flags deemed to be a sensible default
> + *             by the current implementation of the library
> + * GHR_FALLBACK - Use the default hugepage size if possible but fallback to
> + *             smaller pages if necessary
> + * GHR_STRICT   - Use hugepages of some size or return NULL
> + */
> +typedef unsigned long ghr_t;
> +#define GHR_STRICT   ((ghr_t)0x10000000U)
> +#define GHR_FALLBACK ((ghr_t)0x20000000U)
> +#define GHR_DEFAULT  GHR_FALLBACK
> +
> +#define GHR_MASK     (GHR_FALLBACK|GHR_STRICT)
> +
> +/* Allocation functions for regions backed by hugepages */
> +void *get_hugepage_region(size_t len, ghr_t flags);
> +void free_hugepage_region(void *ptr);
> +
>  #endif /* _HUGETLBFS_H */
> diff --git a/man/get_huge_pages.3 b/man/get_huge_pages.3
> index f2a33a4..af95a82 100644
> --- a/man/get_huge_pages.3
> +++ b/man/get_huge_pages.3
> @@ -64,6 +64,8 @@ mmap() was due to.
>  ,
>  .I gethugepagesize(3)
>  ,
> +.I get_hugepage_region(3)
> +,
>  .I libhugetlbfs(7)
>  .SH AUTHORS
>  libhugetlbfs was written by various people on the libhugetlbfs-devel
> diff --git a/man/get_hugepage_region.3 b/man/get_hugepage_region.3
> new file mode 100644
> index 0000000..ce0b018
> --- /dev/null
> +++ b/man/get_hugepage_region.3
> @@ -0,0 +1,79 @@
> +.\"                                      Hey, EMACS: -*- nroff -*-
> +.\" First parameter, NAME, should be all caps
> +.\" Second parameter, SECTION, should be 1-8, maybe w/ subsection
> +.\" other parameters are allowed: see man(7), man(1)
> +.TH GET_HUGEPAGE_REGION 3 "November 7, 2008"
> +.\" Please adjust this date whenever revising the manpage.
> +.\"
> +.\" Some roff macros, for reference:
> +.\" .nh        disable hyphenation
> +.\" .hy        enable hyphenation
> +.\" .ad l      left justify
> +.\" .ad b      justify to both left and right margins
> +.\" .nf        disable filling
> +.\" .fi        enable filling
> +.\" .br        insert line break
> +.\" .sp <n>    insert n+1 empty lines
> +.\" for manpage-specific macros, see man(7)
> +.SH NAME
> +get_hugepage_region, free_hugepage_region \- Allocate and free regions of 
> memory that use hugepages where possible
> +.SH SYNOPSIS
> +.B #include <hugetlbfs.h>
> +.br
> +
> +.br
> +.B void *get_hugepage_region(size_t len, ghr_t flags);
> +.br
> +.B void free_hugepage_region(void *ptr);
> +.SH DESCRIPTION
> +
> +\fBget_hugepage_region()\fP allocates a memory region \fBlen\fP bytes in size
> +backed by hugepages. Hugepages may be of benefit to applications that use
> +large amounts of address space and suffer a performance hit due to TLB
> +misses. Wall-clock time or oprofile can be used to determine if there is
> +a performance benefit from using hugepages or not.
> +
> +Unlike \fBget_huge_pages()\fB, \fBlen\fP does not have to be hugepage-aligned
> +although memory may be wasted due to alignment. The caller may also specify
> +that base pages be used in the event there are no hugepages available.
> +
> +The \fBflags\fP argument changes the behaviour of the function. Flags may
> +be or'd together.
> +
> +.TP
> +.B GHR_FALLBACK
> +Use base pages if there are an insufficient number of huge pages.
> +
> +.B GHR_STRICT
> +Use hugepages or return NULL.
> +
> +.B GHR_DEFAULT
> +
> +The library chooses a sensible combination of flags for allocating a region 
> of
> +memory. The current default is:
> +     GHR_FALLBACK
> +
> +.PP
> +
> +\fBfree_hugepage_region()\fP frees a region of memory allocated by
> +\fBget_hugepage_region()\fP. The behaviour of the function if another
> +pointer is used, valid or otherwise, is undefined.
> +
> +.SH RETURN VALUE
> +
> +On success, a pointer is returned for to the allocated memory. On
> +error, NULL is returned. errno will be set based on what the failure of
> +mmap() was due to.
> +
> +.SH SEE ALSO
> +.I oprofile(1)
> +,
> +.I gethugepagesize(3)
> +,
> +.I get_huge_pages(3)
> +,
> +.I libhugetlbfs(7)
> +.SH AUTHORS
> +libhugetlbfs was written by various people on the libhugetlbfs-devel
> +mailing list.
> +
> diff --git a/tests/Makefile b/tests/Makefile
> index 009f75f..4313084 100644
> --- a/tests/Makefile
> +++ b/tests/Makefile
> @@ -7,8 +7,8 @@ LIB_TESTS = gethugepagesize test_root find_path unlinked_fd 
> misalign \
>       truncate_reserve_wraparound truncate_sigbus_versus_oom \
>       map_high_truncate_2 truncate_above_4GB direct \
>       misaligned_offset brk_near_huge task-size-overrun stack_grow_into_huge \
> -     counters quota heap-overflow get_huge_pages shmoverride_linked \
> -     gethugepagesizes
> +     counters quota heap-overflow get_huge_pages get_hugepage_region \
> +     shmoverride_linked gethugepagesizes
>  LIB_TESTS_64 = straddle_4GB huge_at_4GB_normal_below \
>       huge_below_4GB_normal_above
>  NOLIB_TESTS = malloc malloc_manysmall dummy heapshrink shmoverride_unlinked
> diff --git a/tests/get_hugepage_region.c b/tests/get_hugepage_region.c
> new file mode 100644
> index 0000000..81428e4
> --- /dev/null
> +++ b/tests/get_hugepage_region.c
> @@ -0,0 +1,129 @@
> +/*
> + * libhugetlbfs - Easy use of Linux hugepages
> + * Copyright (C) 2005-2006 David Gibson & Adam Litke, IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public License
> + * as published by the Free Software Foundation; either version 2.1 of
> + * the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <hugetlbfs.h>
> +
> +#include "hugetests.h"
> +
> +long hpage_size;
> +long oc_hugepages = -1;
> +
> +/* Restore nr_overcommit_hugepages */
> +void cleanup(void)
> +{
> +     if (oc_hugepages != -1)
> +             set_nr_overcommit_hugepages(hpage_size, oc_hugepages);
> +}
> +
> +/* Confirm a region really frees, only really important for GHR_FALLBACK */
> +void free_and_confirm_region_free(void *p, int line) {
> +     unsigned char vec = 0;
> +     free_hugepage_region(p);
> +     if (mincore(p, 4, &vec) == 0 || vec)
> +             FAIL("free_hugepage_region did not free region at line %d", 
> line);
> +}
> +
> +#define TESTLEN ((num_hugepages - 1) * hpage_size + hpage_size / 2)
> +
> +void test_GHR_STRICT(int num_hugepages)
> +{
> +     int err;
> +     void *p = get_hugepage_region(TESTLEN, GHR_DEFAULT);
> +     if (p == NULL)
> +             FAIL("get_hugepage_region() for %d hugepages", num_hugepages);
> +
> +     memset(p, 1, TESTLEN);
> +
> +     err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
> +     if (err != 1)
> +             FAIL("Returned page is not hugepage");
> +
> +     free_and_confirm_region_free(p, __LINE__);
> +     err = test_addr_huge(p);
> +     if (err == 1)
> +             FAIL("hugepage was not correctly freed");
> +}
> +
> +void test_GHR_FALLBACK(void)
> +{
> +     int err;
> +     long rsvd_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_RSVD);
> +     long num_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_TOTAL)
> +             - rsvd_hugepages;
> +
> +     /* We must disable overcommitted huge pages to test this */
> +     oc_hugepages = get_huge_page_counter(hpage_size, HUGEPAGES_OC);
> +     set_nr_overcommit_hugepages(hpage_size, 0);
> +
> +     /* We should be able to allocate the whole pool */
> +     void *p = get_hugepage_region(TESTLEN, GHR_DEFAULT);
> +     if (p == NULL)
> +             FAIL("test_GHR_FALLBACK(GHR_DEFAULT) failed for %ld hugepages",
> +                     num_hugepages);
> +     memset(p, 1, TESTLEN);
> +     err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
> +     if (err != 1)
> +             FAIL("Returned page is not hugepage");
> +     free_and_confirm_region_free(p, __LINE__);
> +
> +     /* We should fail allocating too much */
> +     num_hugepages++;
> +     p = get_hugepage_region(TESTLEN, GHR_STRICT);
> +     if (p != NULL)
> +             FAIL("test_GHR_FALLBACK() for %ld expected fail, got success", 
> num_hugepages);
> +
> +     /* GHR_FALLBACK should succeed by allocating base pages */
> +     p = get_hugepage_region(TESTLEN, GHR_FALLBACK);
> +     if (p == NULL)
> +             FAIL("test_GHR_FALLBACK(GHR_FALLBACK) failed for %ld hugepages",
> +                     num_hugepages);
> +     memset(p, 1, TESTLEN);
> +     err = test_addr_huge(p + (num_hugepages - 1) * hpage_size);
> +     if (err == 1)
> +             FAIL("Returned page is not a base page");
> +
> +     /*
> +      * We allocate a second fallback region to see can they be told apart
> +      * on free. Merging VMAs would cause problems
> +      */
> +     void *pb = get_hugepage_region(TESTLEN, GHR_FALLBACK);
> +     if (pb == NULL)
> +             FAIL("test_GHR_FALLBACK(GHR_FALLBACK) x2 failed for %ld 
> hugepages",
> +                     num_hugepages);
> +     memset(pb, 1, TESTLEN);
> +
> +     free_and_confirm_region_free(pb, __LINE__);
> +     free_and_confirm_region_free(p, __LINE__);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +     test_init(argc, argv);
> +     hpage_size = gethugepagesize();
> +     check_free_huge_pages(4);
> +     test_GHR_STRICT(1);
> +     test_GHR_STRICT(4);
> +     test_GHR_FALLBACK();
> +
> +     PASS();
> +}
> diff --git a/tests/run_tests.sh b/tests/run_tests.sh
> index 9064451..f163b11 100755
> --- a/tests/run_tests.sh
> +++ b/tests/run_tests.sh
> @@ -341,6 +341,9 @@ check_linkhuge_tests
>  # Test direct allocation API
>      run_test get_huge_pages
> 
> +# Test hugepage-backed region API
> +    run_test get_hugepage_region
> +
>  # Test overriding of shmget()
>      run_test shmoverride_linked
>      run_test LD_PRELOAD=libhugetlbfs.so shmoverride_unlinked
> diff --git a/version.lds b/version.lds
> index 86cc6b7..e76b8f7 100644
> --- a/version.lds
> +++ b/version.lds
> @@ -18,6 +18,8 @@ HTLBFS_2.0 {
> 
>  HTLBFS_2.1 {
>       global:
> +             get_hugepage_region;
> +             free_hugepage_region;
>               gethugepagesizes;
>               getpagesizes;
>               hugetlbfs_find_path_for_size;
-- 
Adam Litke - (agl at us.ibm.com)
IBM Linux Technology Center


-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to