Did we decide against specifying huge pages in Postgres?

---------------------------------------------------------------------------

On Tue, Oct 30, 2012 at 09:16:07PM +0100, Christian Kruse wrote:
> Hey,
> 
> ok, I think I implemented all of the changes you requested. All but
> the ia64 dependent, I have to do more research for this one.
> 
> 
> Greetings,
>  CK

> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
> index b4fcbaf..66ed10f 100644
> --- a/doc/src/sgml/config.sgml
> +++ b/doc/src/sgml/config.sgml
> @@ -1049,6 +1049,37 @@ include 'filename'
>        </listitem>
>       </varlistentry>
>  
> +     <varlistentry id="guc-huge-tlb-pages" xreflabel="huge_tlb_pages">
> +      <term><varname>huge_tlb_pages</varname> (<type>enum</type>)</term>
> +      <indexterm>
> +       <primary><varname>huge_tlb_pages</> configuration parameter</primary>
> +      </indexterm>
> +      <listitem>
> +       <para>
> +        Enables/disables the use of huge tlb pages. Valid values are
> +        <literal>on</literal>, <literal>off</literal> and 
> <literal>try</literal>.
> +        The default value is <literal>try</literal>.
> +       </para>
> +
> +       <para>
> +        With <varname>huge_tlb_pages</varname> set to <literal>on</literal>
> +        <symbol>mmap()</symbol> will be called with 
> <symbol>MAP_HUGETLB</symbol>.
> +        If the call fails the server will fail fatally.
> +       </para>
> +
> +       <para>
> +        With <varname>huge_tlb_pages</varname> set to <literal>off</literal> 
> we
> +        will not use <symbol>MAP_HUGETLB</symbol> at all.
> +       </para>
> +
> +       <para>
> +        With <varname>huge_tlb_pages</varname> set to <literal>try</literal>
> +        we will try to use <symbol>MAP_HUGETLB</symbol> and fall back to
> +        <symbol>mmap()</symbol> without <symbol>MAP_HUGETLB</symbol>.
> +       </para>
> +      </listitem>
> +     </varlistentry>
> +
>       <varlistentry id="guc-temp-buffers" xreflabel="temp_buffers">
>        <term><varname>temp_buffers</varname> (<type>integer</type>)</term>
>        <indexterm>
> diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
> index df06312..f9de239 100644
> --- a/src/backend/port/sysv_shmem.c
> +++ b/src/backend/port/sysv_shmem.c
> @@ -27,10 +27,14 @@
>  #ifdef HAVE_SYS_SHM_H
>  #include <sys/shm.h>
>  #endif
> +#ifdef MAP_HUGETLB
> +#include <dirent.h>
> +#endif
>  
>  #include "miscadmin.h"
>  #include "storage/ipc.h"
>  #include "storage/pg_shmem.h"
> +#include "utils/guc.h"
>  
>  
>  typedef key_t IpcMemoryKey;          /* shared memory key passed to 
> shmget(2) */
> @@ -61,6 +65,19 @@ typedef int IpcMemoryId;           /* shared memory ID 
> returned by shmget(2) */
>  #define MAP_FAILED ((void *) -1)
>  #endif
>  
> +#ifdef MAP_HUGETLB
> +#  ifdef __ia64__
> +#    define PG_HUGETLB_BASE_ADDR (void *)(0x8000000000000000UL)
> +#    define PG_MAP_HUGETLB (MAP_HUGETLB|MAP_FIXED)
> +#  else
> +#    define PG_HUGETLB_BASE_ADDR (void *)(0x0UL)
> +#    define PG_MAP_HUGETLB MAP_HUGETLB
> +#  endif
> +#else
> +#  define PG_MAP_HUGETLB 0
> +#endif
> +
> +
>  
>  unsigned long UsedShmemSegID = 0;
>  void    *UsedShmemSegAddr = NULL;
> @@ -73,7 +90,6 @@ static void IpcMemoryDelete(int status, Datum shmId);
>  static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
>                                        IpcMemoryId *shmid);
>  
> -
>  /*
>   *   InternalIpcMemoryCreate(memKey, size)
>   *
> @@ -342,6 +358,155 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long 
> id2)
>  }
>  
>  
> +#ifdef MAP_HUGETLB
> +#define HUGE_PAGE_INFO_DIR  "/sys/kernel/mm/hugepages"
> +
> +/*
> + *   static long InternalGetFreeHugepagesCount(const char *name)
> + *
> + * Attempt to read the number of available hugepages from
> + * /sys/kernel/mm/hugepages/hugepages-<size>/free_hugepages
> + * Will fail (return -1) if file could not be opened, 0 if no pages are 
> available
> + * and > 0 if there are free pages
> + *
> + */
> +static long
> +InternalGetFreeHugepagesCount(const char *name)
> +{
> +     int fd;
> +     char buff[1024];
> +     size_t len;
> +     long result;
> +     char *ptr;
> +
> +     len = snprintf(buff, 1024, "%s/%s/free_hugepages", HUGE_PAGE_INFO_DIR, 
> name);
> +     if (len == 1024) /* I don't think that this will happen ever */
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Filename %s/%s/free_hugepages is too 
> long", HUGE_PAGE_INFO_DIR, name),
> +                              errcontext("while checking hugepage size")));
> +             return -1;
> +     }
> +
> +     fd = open(buff, O_RDONLY);
> +     if (fd <= 0)
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Could not open file %s: %s", buff, 
> strerror(errno)),
> +                              errcontext("while checking hugepage size")));
> +             return -1;
> +     }
> +
> +     len = read(fd, buff, 1024);
> +     if (len <= 0)
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Error reading from file %s: %s", buff, 
> strerror(errno)),
> +                              errcontext("while checking hugepage size")));
> +             close(fd);
> +             return -1;
> +     }
> +
> +     /*
> +      * If the content of free_hugepages is longer than or equal to 1024 
> bytes
> +      * the rest is irrelevant; we simply want to know if there are any
> +      * hugepages left
> +      */
> +     if (len == 1024)
> +     {
> +             buff[1023] = 0;
> +     }
> +     else
> +     {
> +             buff[len] = 0;
> +     }
> +
> +     close(fd);
> +
> +     result = strtol(buff, &ptr, 10);
> +
> +     if (ptr == NULL)
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Could not convert contents of file 
> %s/%s/free_hugepages to number", HUGE_PAGE_INFO_DIR, name),
> +                              errcontext("while checking hugepage size")));
> +             return -1;
> +     }
> +
> +     return result;
> +}
> +
> +/*
> + *   static long InternalGetHugepageSize()
> + *
> + * Attempt to get a valid hugepage size from /sys/kernel/mm/hugepages/ by
> + * reading directory contents
> + * Will fail (return -1) if the directory could not be opened or no valid
> + * page sizes are available. Will return the biggest hugepage size on
> + * success.
> + *
> + */
> +static long
> +InternalGetHugepageSize()
> +{
> +     struct dirent *ent;
> +     DIR *dir = opendir(HUGE_PAGE_INFO_DIR);
> +     long smallest_size = -1, size;
> +     char *ptr;
> +
> +     if (dir == NULL)
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Could not open directory %s: %s", 
> HUGE_PAGE_INFO_DIR, strerror(errno)),
> +                              errcontext("while checking hugepage size")));
> +             return -1;
> +     }
> +
> +     /*
> +      * Linux supports multiple hugepage sizes if the hardware
> +      * supports it; for each possible size there will be a
> +      * directory in /sys/kernel/mm/hugepages consisting of the
> +      * string hugepages- and the size of the page, e.g. on x86_64:
> +      * hugepages-2048kB
> +      */
> +     while((ent = readdir(dir)) != NULL)
> +     {
> +             if (strncmp(ent->d_name, "hugepages-", 10) == 0)
> +             {
> +                     size = strtol(ent->d_name + 10, &ptr, 10);
> +                     if (ptr == NULL)
> +                     {
> +                             continue;
> +                     }
> +
> +                     if (strcmp(ptr, "kB") == 0)
> +                     {
> +                             size *= 1024;
> +                     }
> +
> +                     if ((smallest_size == -1 || size < smallest_size)
> +                             && InternalGetFreeHugepagesCount(ent->d_name) > 
> 0)
> +                     {
> +                             smallest_size = size;
> +                     }
> +             }
> +     }
> +
> +     closedir(dir);
> +
> +     if (smallest_size == -1)
> +     {
> +             ereport(huge_tlb_pages == HUGE_TLB_TRY ? DEBUG1 : WARNING,
> +                             (errmsg("Could not find a valid hugepage size"),
> +                              errhint("This error usually means that either 
> CONFIG_HUGETLB_PAGE "
> +                                              "is not in kernel or that your 
> architecture does not "
> +                                              "support hugepages or you did 
> not configure hugepages")));
> +     }
> +
> +     return smallest_size;
> +}
> +#endif
> +
>  /*
>   * PGSharedMemoryCreate
>   *
> @@ -391,7 +556,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int 
> port)
>        */
>  #ifndef EXEC_BACKEND
>       {
> +#ifdef MAP_HUGETLB
> +             long    pagesize = 0;
> +
> +             if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == 
> HUGE_TLB_TRY)
> +                     pagesize = InternalGetHugepageSize();
> +
> +             if (pagesize <= 0)
> +                     pagesize = sysconf(_SC_PAGE_SIZE);
> +#else
>               long    pagesize = sysconf(_SC_PAGE_SIZE);
> +#endif
>  
>               /*
>                * Ensure request size is a multiple of pagesize.
> @@ -410,8 +585,22 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int 
> port)
>                * to be false, we might need to add a run-time test here and 
> do this
>                * only if the running kernel supports it.
>                */
> -             AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, 
> PG_MMAP_FLAGS,
> -                                                       -1, 0);
> +
> +             if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == 
> HUGE_TLB_TRY)
> +             {
> +                     AnonymousShmem = mmap(PG_HUGETLB_BASE_ADDR, size, 
> PROT_READ|PROT_WRITE,
> +                                                               
> PG_MMAP_FLAGS|PG_MAP_HUGETLB, -1, 0);
> +
> +                     elog(DEBUG3, "mmap() tried with MAP_HUGEPAGE: %p", 
> AnonymousShmem);
> +             }
> +
> +             if ((AnonymousShmem == MAP_FAILED && huge_tlb_pages == 
> HUGE_TLB_TRY)
> +                     || huge_tlb_pages == HUGE_TLB_OFF)
> +             {
> +                     AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, 
> PG_MMAP_FLAGS,
> +                                                               -1, 0);
> +             }
> +
>               if (AnonymousShmem == MAP_FAILED)
>                       ereport(FATAL,
>                        (errmsg("could not map anonymous shared memory: %m"),
> diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
> index 745e7be..28b6191 100644
> --- a/src/backend/utils/misc/guc.c
> +++ b/src/backend/utils/misc/guc.c
> @@ -22,6 +22,7 @@
>  #include <limits.h>
>  #include <unistd.h>
>  #include <sys/stat.h>
> +#include <sys/mman.h>
>  #ifdef HAVE_SYSLOG
>  #include <syslog.h>
>  #endif
> @@ -389,6 +390,22 @@ static const struct config_enum_entry 
> synchronous_commit_options[] = {
>  };
>  
>  /*
> + * huge_tlb_pages may be on|off|try, where try is the default
> + * on: try to mmap() with MAP_HUGETLB and fail when mmap() fails
> + * off: do not try tp mmap() with MAP_HUGETLB
> + * try: try to mmap() with MAP_HUGETLB and fallback to mmap()
> + *      w/o MAP_HUGETLB
> + */
> +static const struct config_enum_entry huge_tlb_options[] = {
> +#ifdef MAP_HUGETLB
> +     {"on", HUGE_TLB_ON, false},
> +     {"try", HUGE_TLB_TRY, false},
> +#endif
> +     {"off", HUGE_TLB_OFF, false},
> +     {NULL, 0, false}
> +};
> +
> +/*
>   * Options for enum values stored in other modules
>   */
>  extern const struct config_enum_entry wal_level_options[];
> @@ -447,6 +464,12 @@ int                      tcp_keepalives_idle;
>  int                  tcp_keepalives_interval;
>  int                  tcp_keepalives_count;
>  
> +#ifdef MAP_HUGETLB
> +int huge_tlb_pages = HUGE_TLB_TRY;
> +#else
> +int huge_tlb_pages = HUGE_TLB_OFF;
> +#endif
> +
>  /*
>   * These variables are all dummies that don't do anything, except in some
>   * cases provide the value for SHOW to display.  The real state is elsewhere
> @@ -3301,6 +3324,26 @@ static struct config_enum ConfigureNamesEnum[] =
>               NULL, NULL, NULL
>       },
>  
> +     {
> +             {"huge_tlb_pages",
> +#ifdef MAP_HUGETLB
> +                     PGC_SUSET,
> +#else
> +                     PGC_INTERNAL,
> +#endif
> +                     RESOURCES_MEM,
> +                     gettext_noop("Enable/disable the use of the hugepages 
> feature"),
> +                     NULL
> +             },
> +             &huge_tlb_pages,
> +#ifdef MAP_HUGETLB
> +             HUGE_TLB_TRY,
> +#else
> +             HUGE_TLB_OFF,
> +#endif
> +             huge_tlb_options,
> +             NULL, NULL, NULL
> +     },
>  
>       /* End-of-list marker */
>       {
> diff --git a/src/backend/utils/misc/postgresql.conf.sample 
> b/src/backend/utils/misc/postgresql.conf.sample
> index eeb9b82..e5bafec 100644
> --- a/src/backend/utils/misc/postgresql.conf.sample
> +++ b/src/backend/utils/misc/postgresql.conf.sample
> @@ -113,6 +113,7 @@
>  
>  #shared_buffers = 32MB                       # min 128kB
>                                       # (change requires restart)
> +#huge_tlb_pages = try                        # try to map memory with 
> MAP_HUGETLB (on, off, try)
>  #temp_buffers = 8MB                  # min 800kB
>  #max_prepared_transactions = 0               # zero disables the feature
>                                       # (change requires restart)
> diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
> index 06f797c..17f5870 100644
> --- a/src/include/utils/guc.h
> +++ b/src/include/utils/guc.h
> @@ -230,6 +230,24 @@ extern int       tcp_keepalives_idle;
>  extern int   tcp_keepalives_interval;
>  extern int   tcp_keepalives_count;
>  
> +
> +/*
> + * Possible values for huge_tlb_pages; default is HUGE_TLB_TRY
> + */
> +typedef enum
> +{
> +     HUGE_TLB_OFF,
> +     HUGE_TLB_ON,
> +     HUGE_TLB_TRY
> +} HugeTlbType;
> +
> +
> +/*
> + * configure the use of huge TLB pages
> + */
> +extern int huge_tlb_pages;
> +
> +
>  /*
>   * Functions exported by guc.c
>   */




-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to