This patch adds the --obey-numa-mempol which requests that the
machine allocate hugepages based on the NUMA memory policy if
it is available.

Signed-off-by: Eric B Munson <ebmun...@us.ibm.com>

---
Changes from V1:
* Rename option to obey-mempolicy
* Remove if/else's around pool adjustment and replace with inline conditional
* Rework man page entry to more accurately discribe the new switch
---
 hugeadm.c                |   32 +++++++++++++++++++++++++++-----
 hugeutils.c              |    4 ++++
 libhugetlbfs_privutils.h |   13 +++++++------
 man/hugeadm.8            |    8 ++++++++
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/hugeadm.c b/hugeadm.c
index ebc5bfe..5800cfe 100644
--- a/hugeadm.c
+++ b/hugeadm.c
@@ -91,6 +91,8 @@ void print_usage()
        CONT("specified count on failure");
        OPTION("--pool-pages-min 
<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
        CONT("Adjust pool 'size' lower bound");
+       OPTION("--obey-mempolicy", "Obey the NUMA memory policy when");
+       CONT("adjusting the pool 'size' lower bound");
        OPTION("--pool-pages-max 
<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
        CONT("Adjust pool 'size' upper bound");
        OPTION("--set-recommended-min_free_kbytes", "");
@@ -150,6 +152,7 @@ int opt_set_hugetlb_shm_group = 0;
 int opt_temp_swap = 0;
 int opt_ramdisk_swap = 0;
 int opt_swap_persist = 0;
+int opt_obey_mempolicy = 0;
 unsigned long opt_limit_mount_size = 0;
 int opt_limit_mount_inodes = 0;
 int verbose_level = VERBOSITY_DEFAULT;
@@ -231,6 +234,7 @@ void verbose_expose(void)
 #define LONG_POOL_LIST         (LONG_POOL|'l')
 #define LONG_POOL_MIN_ADJ      (LONG_POOL|'m')
 #define LONG_POOL_MAX_ADJ      (LONG_POOL|'M')
+#define LONG_POOL_MEMPOL       (LONG_POOL|'p')
 
 #define LONG_SET_RECOMMENDED_MINFREEKBYTES     ('k' << 8)
 #define LONG_SET_RECOMMENDED_SHMMAX            ('x' << 8)
@@ -1176,8 +1180,18 @@ void pool_adjust(char *cmd, unsigned int counter)
                        add_ramdisk_swap(page_size);
                check_swap();
        }
-       INFO("setting HUGEPAGES_TOTAL to %ld\n", min);
-       set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
+
+       if (opt_obey_mempolicy && get_huge_page_counter(page_size,
+                               HUGEPAGES_TOTAL_MEMPOL) < 0) {
+               opt_obey_mempolicy = 0;
+               WARNING("Counter for NUMA huge page allocations is not found, 
continuing with normal pool adjustment\n");
+       }
+
+       INFO("setting HUGEPAGES_TOTAL%s to %ld\n",
+               opt_obey_mempolicy ? "_MEMPOL" : "", min);
+       set_huge_page_counter(page_size,
+               opt_obey_mempolicy ? HUGEPAGES_TOTAL_MEMPOL : HUGEPAGES_TOTAL,
+               min);
        get_pool_size(page_size, &pools[pos]);
 
        /* If we fail to make an allocation, retry if user requests */
@@ -1191,9 +1205,12 @@ void pool_adjust(char *cmd, unsigned int counter)
                sleep(6);
 
                last_pool_value = pools[pos].minimum;
-               INFO("Retrying allocation HUGEPAGES_TOTAL to %ld current %ld\n",
-                                                       min, 
pools[pos].minimum);
-               set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
+               INFO("Retrying allocation HUGEPAGES_TOTAL%s to %ld current 
%ld\n", opt_obey_mempolicy ? "_MEMPOL" : "", min, pools[pos].minimum);
+               set_huge_page_counter(page_size,
+                       opt_obey_mempolicy ?
+                               HUGEPAGES_TOTAL_MEMPOL :
+                               HUGEPAGES_TOTAL,
+                       min);
                get_pool_size(page_size, &pools[pos]);
        }
 
@@ -1280,6 +1297,7 @@ int main(int argc, char** argv)
                {"pool-list", no_argument, NULL, LONG_POOL_LIST},
                {"pool-pages-min", required_argument, NULL, LONG_POOL_MIN_ADJ},
                {"pool-pages-max", required_argument, NULL, LONG_POOL_MAX_ADJ},
+               {"obey-mempolicy", no_argument, NULL, LONG_POOL_MEMPOL},
                {"set-recommended-min_free_kbytes", no_argument, NULL, 
LONG_SET_RECOMMENDED_MINFREEKBYTES},
                {"set-recommended-shmmax", no_argument, NULL, 
LONG_SET_RECOMMENDED_SHMMAX},
                {"set-shm-group", required_argument, NULL, 
LONG_SET_HUGETLB_SHM_GROUP},
@@ -1379,6 +1397,10 @@ int main(int argc, char** argv)
                        }
                        break;
 
+               case LONG_POOL_MEMPOL:
+                       opt_obey_mempolicy = 1;
+                       break;
+
                case LONG_POOL_MAX_ADJ:
                        if (! kernel_has_overcommit()) {
                                ERROR("kernel does not support overcommit, "
diff --git a/hugeutils.c b/hugeutils.c
index 1e35597..3a9501f 100644
--- a/hugeutils.c
+++ b/hugeutils.c
@@ -146,6 +146,10 @@ static struct hugetlb_pool_counter_info_t 
hugetlb_counter_info[] = {
                .meminfo_key    = "HugePages_Total:",
                .sysfs_file     = "nr_hugepages",
        },
+       [HUGEPAGES_TOTAL_MEMPOL] = {
+               .meminfo_key    = "HugePages_Total:",
+               .sysfs_file     = "nr_hugepages_mempolicy",
+       },
        [HUGEPAGES_FREE] = {
                .meminfo_key    = "HugePages_Free:",
                .sysfs_file     = "free_hugepages",
diff --git a/libhugetlbfs_privutils.h b/libhugetlbfs_privutils.h
index 18bcedb..aaa638f 100644
--- a/libhugetlbfs_privutils.h
+++ b/libhugetlbfs_privutils.h
@@ -34,12 +34,13 @@
 
 /* Hugetlb pool counter operations */
 /* Keys for reading hugetlb pool counters */
-enum {                  /* The number of pages of a given size that ... */
-       HUGEPAGES_TOTAL, /*  are allocated to the pool */
-       HUGEPAGES_FREE,  /*  are not in use */
-       HUGEPAGES_RSVD,  /*  are reserved for possible future use */
-       HUGEPAGES_SURP,  /*  are allocated to the pool on demand */
-       HUGEPAGES_OC,    /*  can be allocated on demand - maximum */
+enum {                 /* The number of pages of a given size that ... */
+       HUGEPAGES_TOTAL,        /* are allocated to the pool */
+       HUGEPAGES_TOTAL_MEMPOL, /* are allocated following the NUMA mempolicy */
+       HUGEPAGES_FREE,         /* are not in use */
+       HUGEPAGES_RSVD,         /* are reserved for possible future use */
+       HUGEPAGES_SURP,         /* are allocated to the pool on demand */
+       HUGEPAGES_OC,           /* can be allocated on demand - maximum */
        HUGEPAGES_MAX_COUNTERS,
 };
 #define get_huge_page_counter __pu_get_huge_page_counter
diff --git a/man/hugeadm.8 b/man/hugeadm.8
index 05cdceb..0c577bc 100644
--- a/man/hugeadm.8
+++ b/man/hugeadm.8
@@ -160,6 +160,14 @@ requested for the Minimum pool. The size of the pools 
should be checked after
 executing this command to ensure they were successful.
 
 .TP
+.B --obey-numa-mempol
+
+This option requests that allocation of huge pages to the static pool with
+\fB--pool-pages-min\fP obey the NUMA memory policy of the current process. This
+policy can be explicitly specified using numactl or inherited from a parent
+process.
+
+.TP
 .B --pool-pages-max=<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>
 
 This option sets or adjusts the Maximum number of hugepages. Note that while
-- 
1.7.0.4


------------------------------------------------------------------------------

_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to