This patch adds the --obey-numa-mempol which requests that the machine allocate hugepages based on the NUMA memory policy if it is available.
Signed-off-by: Eric B Munson <ebmun...@us.ibm.com> --- Changes from V1: * Rename option to obey-mempolicy * Remove if/else's around pool adjustment and replace with inline conditional * Rework man page entry to more accurately discribe the new switch --- hugeadm.c | 32 +++++++++++++++++++++++++++----- hugeutils.c | 4 ++++ libhugetlbfs_privutils.h | 13 +++++++------ man/hugeadm.8 | 8 ++++++++ 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/hugeadm.c b/hugeadm.c index ebc5bfe..5800cfe 100644 --- a/hugeadm.c +++ b/hugeadm.c @@ -91,6 +91,8 @@ void print_usage() CONT("specified count on failure"); OPTION("--pool-pages-min <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", ""); CONT("Adjust pool 'size' lower bound"); + OPTION("--obey-mempolicy", "Obey the NUMA memory policy when"); + CONT("adjusting the pool 'size' lower bound"); OPTION("--pool-pages-max <size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", ""); CONT("Adjust pool 'size' upper bound"); OPTION("--set-recommended-min_free_kbytes", ""); @@ -150,6 +152,7 @@ int opt_set_hugetlb_shm_group = 0; int opt_temp_swap = 0; int opt_ramdisk_swap = 0; int opt_swap_persist = 0; +int opt_obey_mempolicy = 0; unsigned long opt_limit_mount_size = 0; int opt_limit_mount_inodes = 0; int verbose_level = VERBOSITY_DEFAULT; @@ -231,6 +234,7 @@ void verbose_expose(void) #define LONG_POOL_LIST (LONG_POOL|'l') #define LONG_POOL_MIN_ADJ (LONG_POOL|'m') #define LONG_POOL_MAX_ADJ (LONG_POOL|'M') +#define LONG_POOL_MEMPOL (LONG_POOL|'p') #define LONG_SET_RECOMMENDED_MINFREEKBYTES ('k' << 8) #define LONG_SET_RECOMMENDED_SHMMAX ('x' << 8) @@ -1176,8 +1180,18 @@ void pool_adjust(char *cmd, unsigned int counter) add_ramdisk_swap(page_size); check_swap(); } - INFO("setting HUGEPAGES_TOTAL to %ld\n", min); - set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min); + + if (opt_obey_mempolicy && get_huge_page_counter(page_size, + HUGEPAGES_TOTAL_MEMPOL) < 0) { + opt_obey_mempolicy = 0; + WARNING("Counter for NUMA huge page allocations is not found, continuing with normal pool adjustment\n"); + } + + INFO("setting HUGEPAGES_TOTAL%s to %ld\n", + opt_obey_mempolicy ? "_MEMPOL" : "", min); + set_huge_page_counter(page_size, + opt_obey_mempolicy ? HUGEPAGES_TOTAL_MEMPOL : HUGEPAGES_TOTAL, + min); get_pool_size(page_size, &pools[pos]); /* If we fail to make an allocation, retry if user requests */ @@ -1191,9 +1205,12 @@ void pool_adjust(char *cmd, unsigned int counter) sleep(6); last_pool_value = pools[pos].minimum; - INFO("Retrying allocation HUGEPAGES_TOTAL to %ld current %ld\n", - min, pools[pos].minimum); - set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min); + INFO("Retrying allocation HUGEPAGES_TOTAL%s to %ld current %ld\n", opt_obey_mempolicy ? "_MEMPOL" : "", min, pools[pos].minimum); + set_huge_page_counter(page_size, + opt_obey_mempolicy ? + HUGEPAGES_TOTAL_MEMPOL : + HUGEPAGES_TOTAL, + min); get_pool_size(page_size, &pools[pos]); } @@ -1280,6 +1297,7 @@ int main(int argc, char** argv) {"pool-list", no_argument, NULL, LONG_POOL_LIST}, {"pool-pages-min", required_argument, NULL, LONG_POOL_MIN_ADJ}, {"pool-pages-max", required_argument, NULL, LONG_POOL_MAX_ADJ}, + {"obey-mempolicy", no_argument, NULL, LONG_POOL_MEMPOL}, {"set-recommended-min_free_kbytes", no_argument, NULL, LONG_SET_RECOMMENDED_MINFREEKBYTES}, {"set-recommended-shmmax", no_argument, NULL, LONG_SET_RECOMMENDED_SHMMAX}, {"set-shm-group", required_argument, NULL, LONG_SET_HUGETLB_SHM_GROUP}, @@ -1379,6 +1397,10 @@ int main(int argc, char** argv) } break; + case LONG_POOL_MEMPOL: + opt_obey_mempolicy = 1; + break; + case LONG_POOL_MAX_ADJ: if (! kernel_has_overcommit()) { ERROR("kernel does not support overcommit, " diff --git a/hugeutils.c b/hugeutils.c index 1e35597..3a9501f 100644 --- a/hugeutils.c +++ b/hugeutils.c @@ -146,6 +146,10 @@ static struct hugetlb_pool_counter_info_t hugetlb_counter_info[] = { .meminfo_key = "HugePages_Total:", .sysfs_file = "nr_hugepages", }, + [HUGEPAGES_TOTAL_MEMPOL] = { + .meminfo_key = "HugePages_Total:", + .sysfs_file = "nr_hugepages_mempolicy", + }, [HUGEPAGES_FREE] = { .meminfo_key = "HugePages_Free:", .sysfs_file = "free_hugepages", diff --git a/libhugetlbfs_privutils.h b/libhugetlbfs_privutils.h index 18bcedb..aaa638f 100644 --- a/libhugetlbfs_privutils.h +++ b/libhugetlbfs_privutils.h @@ -34,12 +34,13 @@ /* Hugetlb pool counter operations */ /* Keys for reading hugetlb pool counters */ -enum { /* The number of pages of a given size that ... */ - HUGEPAGES_TOTAL, /* are allocated to the pool */ - HUGEPAGES_FREE, /* are not in use */ - HUGEPAGES_RSVD, /* are reserved for possible future use */ - HUGEPAGES_SURP, /* are allocated to the pool on demand */ - HUGEPAGES_OC, /* can be allocated on demand - maximum */ +enum { /* The number of pages of a given size that ... */ + HUGEPAGES_TOTAL, /* are allocated to the pool */ + HUGEPAGES_TOTAL_MEMPOL, /* are allocated following the NUMA mempolicy */ + HUGEPAGES_FREE, /* are not in use */ + HUGEPAGES_RSVD, /* are reserved for possible future use */ + HUGEPAGES_SURP, /* are allocated to the pool on demand */ + HUGEPAGES_OC, /* can be allocated on demand - maximum */ HUGEPAGES_MAX_COUNTERS, }; #define get_huge_page_counter __pu_get_huge_page_counter diff --git a/man/hugeadm.8 b/man/hugeadm.8 index 05cdceb..0c577bc 100644 --- a/man/hugeadm.8 +++ b/man/hugeadm.8 @@ -160,6 +160,14 @@ requested for the Minimum pool. The size of the pools should be checked after executing this command to ensure they were successful. .TP +.B --obey-numa-mempol + +This option requests that allocation of huge pages to the static pool with +\fB--pool-pages-min\fP obey the NUMA memory policy of the current process. This +policy can be explicitly specified using numactl or inherited from a parent +process. + +.TP .B --pool-pages-max=<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>> This option sets or adjusts the Maximum number of hugepages. Note that while -- 1.7.0.4 ------------------------------------------------------------------------------ _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel