On Tue, May 25, 2010 at 02:39:23PM +0100, Eric B Munson wrote:
> This patch adds the --obey-numa-mempol which requests that the
> machine allocate hugepages based on the NUMA memory policy if
> it is available.
> 
> Signed-off-by: Eric B Munson <ebmun...@us.ibm.com>

It looks fine implementation wise. I would change the name of the switch
to simply --obey-mempolicy though for two reasons. First, numa in the
switch title is redundant, mempolicy is always about NUMA. Second, I
would use "mempolicy" instead of mempol because the system calls and
documentation typically call it memory policy or mempolicy so it'll feel
familiar.

Minor comments below but they can be ignored if you like.

> ---
>  hugeadm.c                |   39 ++++++++++++++++++++++++++++++++++-----
>  hugeutils.c              |    4 ++++
>  libhugetlbfs_privutils.h |   13 +++++++------
>  man/hugeadm.8            |    7 +++++++
>  4 files changed, 52 insertions(+), 11 deletions(-)
> 
> diff --git a/hugeadm.c b/hugeadm.c
> index ebc5bfe..8bb9d7a 100644
> --- a/hugeadm.c
> +++ b/hugeadm.c
> @@ -91,6 +91,8 @@ void print_usage()
>       CONT("specified count on failure");
>       OPTION("--pool-pages-min 
> <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
>       CONT("Adjust pool 'size' lower bound");
> +     OPTION("--obey-numa-mempol", "Obey the NUMA memory policy when");
> +     CONT("adjusting the pool 'size' lower bound");
>       OPTION("--pool-pages-max 
> <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
>       CONT("Adjust pool 'size' upper bound");
>       OPTION("--set-recommended-min_free_kbytes", "");
> @@ -150,6 +152,7 @@ int opt_set_hugetlb_shm_group = 0;
>  int opt_temp_swap = 0;
>  int opt_ramdisk_swap = 0;
>  int opt_swap_persist = 0;
> +int opt_obey_mempol = 0;
>  unsigned long opt_limit_mount_size = 0;
>  int opt_limit_mount_inodes = 0;
>  int verbose_level = VERBOSITY_DEFAULT;
> @@ -231,6 +234,7 @@ void verbose_expose(void)
>  #define LONG_POOL_LIST               (LONG_POOL|'l')
>  #define LONG_POOL_MIN_ADJ    (LONG_POOL|'m')
>  #define LONG_POOL_MAX_ADJ    (LONG_POOL|'M')
> +#define LONG_POOL_MEMPOL     (LONG_POOL|'p')
>  
>  #define LONG_SET_RECOMMENDED_MINFREEKBYTES   ('k' << 8)
>  #define LONG_SET_RECOMMENDED_SHMMAX          ('x' << 8)
> @@ -1176,8 +1180,23 @@ void pool_adjust(char *cmd, unsigned int counter)
>                       add_ramdisk_swap(page_size);
>               check_swap();
>       }
> -     INFO("setting HUGEPAGES_TOTAL to %ld\n", min);
> -     set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
> +
> +     if (opt_obey_mempol) {
> +             if (get_huge_page_counter(page_size,
> +                                     HUGEPAGES_TOTAL_MEMPOL) < 0) {
> +                     opt_obey_mempol = 0;
> +                     WARNING("Counter for NUMA huge page allocations is not 
> found, continuing with normal pool adjustment\n");
> +             }
> +     }
> +
> +     if (opt_obey_mempol) {
> +             INFO("setting HUGEPAGES_TOTAL_MEMPOL to %ld\n", min);
> +             set_huge_page_counter(page_size, HUGEPAGES_TOTAL_MEMPOL, min);
> +     }
> +     else {
> +             INFO("setting HUGEPAGES_TOTAL to %ld\n", min);
> +             set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
> +     }
>       get_pool_size(page_size, &pools[pos]);
>  
>       /* If we fail to make an allocation, retry if user requests */
> @@ -1191,9 +1210,15 @@ void pool_adjust(char *cmd, unsigned int counter)
>               sleep(6);
>  
>               last_pool_value = pools[pos].minimum;
> -             INFO("Retrying allocation HUGEPAGES_TOTAL to %ld current %ld\n",
> -                                                     min, 
> pools[pos].minimum);
> -             set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
> +             if (opt_obey_mempol) {
> +                     INFO("Retrying allocation HUGEPAGES_TOTAL_MEMPOL to %ld 
> current %ld\n", min, pools[pos].minimum);
> +                     set_huge_page_counter(page_size, HUGEPAGES_TOTAL_MEMPOL,
> +                                             min);
> +             }
> +             else {
> +                     INFO("Retrying allocation HUGEPAGES_TOTAL to %ld 
> current %ld\n", min, pools[pos].minimum);
> +                     set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
> +             }

Could also have been done without if/else with

INFO("Retrying allocation HUGEPAGES_TOTAL%s to %ld current %ld\n",
                opt_obey_mempol ? "_MEMPOL" : "",
                min, pools[pos].minimum);
set_huge_page_counter(page_size,
                opt_obey_mempol ? HUGEPAGES_TOTAL_MEMPOL | HUGEPAGES_TOTAL,
                min);

The only advantage is a minor help to maintenance. If
set_huge_page_counter changes its signature, there still is only one
code path to test here.

>               get_pool_size(page_size, &pools[pos]);
>       }
>  
> @@ -1379,6 +1404,10 @@ int main(int argc, char** argv)
>                       }
>                       break;
>  
> +             case LONG_POOL_MEMPOL:
> +                     opt_obey_mempol = 1;
> +                     break;
> +
>               case LONG_POOL_MAX_ADJ:
>                       if (! kernel_has_overcommit()) {
>                               ERROR("kernel does not support overcommit, "
> diff --git a/hugeutils.c b/hugeutils.c
> index 1e35597..3a9501f 100644
> --- a/hugeutils.c
> +++ b/hugeutils.c
> @@ -146,6 +146,10 @@ static struct hugetlb_pool_counter_info_t 
> hugetlb_counter_info[] = {
>               .meminfo_key    = "HugePages_Total:",
>               .sysfs_file     = "nr_hugepages",
>       },
> +     [HUGEPAGES_TOTAL_MEMPOL] = {
> +             .meminfo_key    = "HugePages_Total:",
> +             .sysfs_file     = "nr_hugepages_mempolicy",
> +     },
>       [HUGEPAGES_FREE] = {
>               .meminfo_key    = "HugePages_Free:",
>               .sysfs_file     = "free_hugepages",
> diff --git a/libhugetlbfs_privutils.h b/libhugetlbfs_privutils.h
> index 18bcedb..aaa638f 100644
> --- a/libhugetlbfs_privutils.h
> +++ b/libhugetlbfs_privutils.h
> @@ -34,12 +34,13 @@
>  
>  /* Hugetlb pool counter operations */
>  /* Keys for reading hugetlb pool counters */
> -enum {                        /* The number of pages of a given size that 
> ... */
> -     HUGEPAGES_TOTAL, /*  are allocated to the pool */
> -     HUGEPAGES_FREE,  /*  are not in use */
> -     HUGEPAGES_RSVD,  /*  are reserved for possible future use */
> -     HUGEPAGES_SURP,  /*  are allocated to the pool on demand */
> -     HUGEPAGES_OC,    /*  can be allocated on demand - maximum */
> +enum {                       /* The number of pages of a given size that ... 
> */
> +     HUGEPAGES_TOTAL,        /* are allocated to the pool */
> +     HUGEPAGES_TOTAL_MEMPOL, /* are allocated following the NUMA mempolicy */
> +     HUGEPAGES_FREE,         /* are not in use */
> +     HUGEPAGES_RSVD,         /* are reserved for possible future use */
> +     HUGEPAGES_SURP,         /* are allocated to the pool on demand */
> +     HUGEPAGES_OC,           /* can be allocated on demand - maximum */
>       HUGEPAGES_MAX_COUNTERS,
>  };
>  #define get_huge_page_counter __pu_get_huge_page_counter
> diff --git a/man/hugeadm.8 b/man/hugeadm.8
> index 05cdceb..13d6199 100644
> --- a/man/hugeadm.8
> +++ b/man/hugeadm.8
> @@ -160,6 +160,13 @@ requested for the Minimum pool. The size of the pools 
> should be checked after
>  executing this command to ensure they were successful.
>  
>  .TP
> +.B --obey-numa-mempol
> +
> +This option requests that huge pages are allocated following the set NUMA
> +momory policy for this machine when changing the minimum number of
> +hugepages in the pool.
> +

Not quite accurate as it is about the process, not the machine.
Something like the following maybe?

This option requests that allocation of huge pages to the static pool with
--pool-pages-min obey the NUMA memory policy of the current process. This
policy can be explicitly specified using numactl or inherited from a parent
process.

> +.TP
>  .B --pool-pages-max=<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>
>  
>  This option sets or adjusts the Maximum number of hugepages. Note that while

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

------------------------------------------------------------------------------

_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to