hugeadm --explain will detect if the value of min_free_kbytes is too small to avoid excessive fragmentation in the system. This patch provides an option to automatically set min_free_kbytes to a recommended value. In this version, the recommended value is one pageblock to be kept free per zone in the system. This may turn out to be too conservative, particularly where there are large variances between zone sizes but it is a reasonable starting point.
Signed-off-by: Mel Gorman <m...@csn.ul.ie> --- hugeadm.c | 21 +++++++++++++++++++++ man/hugeadm.8 | 10 ++++++++++ 2 files changed, 31 insertions(+), 0 deletions(-) diff --git a/hugeadm.c b/hugeadm.c index 6db0bfc..e027504 100644 --- a/hugeadm.c +++ b/hugeadm.c @@ -90,6 +90,9 @@ void print_usage() CONT("Adjust pool 'size' lower bound"); OPTION("--pool-pages-max <size>:[+|-]<count>", ""); CONT("Adjust pool 'size' upper bound"); + OPTION("--set-recommended-min_free_kbytes", ""); + CONT("Sets min_free_kbytes to a recommended value to improve availability of"); + CONT("huge pages at runtime"); OPTION("--add-temp-swap[=count]", "Specified with --pool-pages-min to create"); CONT("temporary swap space for the duration of the pool resize. Default swap"); CONT("size is 5 huge pages. Optional arg sets size to 'count' huge pages"); @@ -131,6 +134,7 @@ void print_usage() int opt_dry_run = 0; int opt_hard = 0; int opt_movable = -1; +int opt_set_recommended_minfreekbytes = 0; int opt_temp_swap = 0; int opt_ramdisk_swap = 0; int opt_swap_persist = 0; @@ -210,6 +214,8 @@ void verbose_expose(void) #define LONG_POOL_MIN_ADJ (LONG_POOL|'m') #define LONG_POOL_MAX_ADJ (LONG_POOL|'M') +#define LONG_SET_RECOMMENDED_MINFREEKBYTES ('k' << 8) + #define LONG_MOVABLE ('z' << 8) #define LONG_MOVABLE_ENABLE (LONG_MOVABLE|'e') #define LONG_MOVABLE_DISABLE (LONG_MOVABLE|'d') @@ -634,6 +640,13 @@ long recommended_minfreekbytes(void) return recommended_min; } +void set_recommended_minfreekbytes(void) +{ + long recommended_min = recommended_minfreekbytes(); + DEBUG("Setting min_free_kbytes to %ld\n", recommended_min); + file_write_ulong(PROCMINFREEKBYTES, (unsigned long)recommended_min); +} + /* * check_minfreekbytes does not alter the value of min_free_kbytes. It just * reports what the current value is and what it should be @@ -1013,6 +1026,7 @@ int main(int argc, char** argv) {"pool-list", no_argument, NULL, LONG_POOL_LIST}, {"pool-pages-min", required_argument, NULL, LONG_POOL_MIN_ADJ}, {"pool-pages-max", required_argument, NULL, LONG_POOL_MAX_ADJ}, + {"set-recommended-min_free_kbytes", no_argument, NULL, LONG_SET_RECOMMENDED_MINFREEKBYTES}, {"enable-zone-movable", no_argument, NULL, LONG_MOVABLE_ENABLE}, {"disable-zone-movable", no_argument, NULL, LONG_MOVABLE_DISABLE}, {"hard", no_argument, NULL, LONG_HARD}, @@ -1126,6 +1140,10 @@ int main(int argc, char** argv) opt_movable = 1; break; + case LONG_SET_RECOMMENDED_MINFREEKBYTES: + opt_set_recommended_minfreekbytes = 1; + break; + case LONG_MOVABLE_DISABLE: opt_movable = 0; break; @@ -1178,6 +1196,9 @@ int main(int argc, char** argv) if (opt_movable != -1) setup_zone_movable(opt_movable); + if (opt_set_recommended_minfreekbytes) + set_recommended_minfreekbytes(); + while (--minadj_count >= 0) { if (! kernel_has_overcommit()) pool_adjust(opt_min_adj[minadj_count], POOL_BOTH); diff --git a/man/hugeadm.8 b/man/hugeadm.8 index 0b46f9c..6342980 100644 --- a/man/hugeadm.8 +++ b/man/hugeadm.8 @@ -77,6 +77,16 @@ by applications or stored on the kernels free list. The "Maximum" value is the largest number of hugepages that can be in use at any given time. .TP +.B --set-recommended-min_free_kbytes + +Fragmentation avoidance in the kernel depends on avoiding pages of different +mobility types being mixed with a pageblock arena - typically the size of +the default huge page size. The more mixing that occurs, the less likely +the huge page pool will be able to dynamically resize. The easiest means of +avoiding mixing is to increase /proc/sys/vm/min_free_kbytes. This parameter +sets min_free_kbytes to a recommended value to aid fragmentation avoidance. + +.TP .B --page-sizes This displays every page size supported by the system and has a pool -- 1.6.3.3 ------------------------------------------------------------------------------ _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel