Please review this initial patch creating the --add-temp-swap flag to temporarily add swap on disk during pool resize.
Once this patch is finalized, I will also add on another patch for creating permanent swap.
I have used system() to run mkswap, but was not sure if there is a better way to do this. Currently the swap file's size is 20 hugepages -- in my testing so far, less than 1/10 of the file is used. I will continue to test to see what the ideal temporary swap size should be. Or if this could be changed by user input? I appreciate feedback Thanks! Avantika ---
When growing the hugepage pool with --pool-pages-min, swap space is generally needed to allocate free memory for the resize. For systems that do not have enough or any swap configured, the user can specify the --add-temp-swap flag. When this flag is specified, temporary swap space is created and activated on disk for the duration of a positive resize of the minimum pool size. After the resize is completed, the swap is turned off and swapfile removed. Signed-off-by: Avantika Mathur <mat...@us.ibm.com> --- Index: libhugetlbfs-tempswap-new/hugeadm.c =================================================================== --- libhugetlbfs-tempswap-new.orig/hugeadm.c 2009-05-22 09:43:10.000000000 -0700 +++ libhugetlbfs-tempswap-new/hugeadm.c 2009-05-22 14:50:24.000000000 -0700 @@ -36,6 +36,7 @@ #include <sys/stat.h> #include <sys/types.h> #include <sys/mount.h> +#include <sys/swap.h> #define _GNU_SOURCE /* for getopt_long */ #include <unistd.h> @@ -85,6 +86,8 @@ CONT("Adjust pool 'size' lower bound"); OPTION("--pool-pages-max <size>:[+|-]<count>", ""); CONT("Adjust pool 'size' upper bound"); + OPTION("--add-temp-swap", "Specified with --pool-pages-min to create"); + CONT("temporary swap space for the duration of the pool resize"); OPTION("--enable-zone-movable", "Use ZONE_MOVABLE for huge pages"); OPTION("--disable-zone-movable", "Do not use ZONE_MOVABLE for huge pages"); OPTION("--create-mounts", "Creates a mount point for each available"); @@ -119,6 +122,7 @@ int opt_dry_run = 0; int opt_hard = 0; int opt_movable = -1; +int opt_temp_swap = 0; int verbose_level = VERBOSITY_DEFAULT; void setup_environment(char *var, char *val) @@ -199,6 +203,7 @@ #define LONG_MOVABLE_DISABLE (LONG_MOVABLE|'d') #define LONG_HARD ('h' << 8) +#define LONG_ADD_TEMP_SWAP ('s' << 8) #define LONG_PAGE ('P' << 8) #define LONG_PAGE_SIZES (LONG_PAGE|'s') @@ -576,14 +581,83 @@ swap_total = read_meminfo(SWAP_TOTAL); if (swap_total <= 0) { WARNING("There is no swap space configured, resizing hugepage pool may fail\n"); + WARNING("Use --add-temp-swap option to temporarily add swap during the resize\n"); return; } swap_sz = read_meminfo(SWAP_FREE); /* meminfo keeps values in kb, but we use bytes for hpage sizes */ swap_sz *= 1024; - if (swap_sz <= gethugepagesize()) + if (swap_sz <= gethugepagesize()) { WARNING("There is very little swap space free, resizing hugepage pool may fail\n"); + WARNING("Use --add-temp-swap option to temporarily add swap during the resize\n"); + } +} + +void add_temp_swap() +{ + char path[PATH_MAX]; + char file[PATH_MAX]; + char mkswap_cmd[PATH_MAX]; + FILE *f, *devzero; + char buf[1024]; + long swap_size; + int i=0; + if (geteuid() != 0) { + ERROR("Swap can only be manipulated by root\n"); + exit(EXIT_FAILURE); + } + + snprintf(path, PATH_MAX, "%s/swap/temp", MOUNT_DIR); + snprintf(file, PATH_MAX, "%s/swapfile", path); + + /* swapsize is 20 hugepages (in KB) */ + swap_size = (gethugepagesize() / 1024) * 20; + + if (ensure_dir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH, 0, 0)) + exit(EXIT_FAILURE); + + devzero = fopen("/dev/zero", "r"); + if (!devzero) { + ERROR("Couldn't open %s: %s\n", file, strerror(errno)); + exit(EXIT_FAILURE); + } + if( !fread(buf, sizeof(char), 1024, devzero)) { + ERROR("Failed to read /dev/zero\n"); + exit(EXIT_FAILURE); + } + fclose(devzero); + + f = fopen(file, "w"); + if (!f) { + ERROR("Couldn't open %s: %s\n", file, strerror(errno)); + exit(EXIT_FAILURE); + } + while (i < swap_size) { + if (!fwrite(buf, sizeof(char), 1024, f)) { + ERROR("Failed to read %s\n", file); + exit(EXIT_FAILURE); + } + i++; + } + fclose(f); + + snprintf(mkswap_cmd, PATH_MAX, "mkswap %s", file); + system(mkswap_cmd); + + INFO("swapon %s\n", file); + if (swapon(file, 0)) + ERROR("swapon on %s failed: %s\n", file, strerror(errno)); +} + +void rem_temp_swap() { + char file[PATH_MAX]; + snprintf(file, PATH_MAX, "%s/swap/temp/swapfile", MOUNT_DIR); + + if (swapoff(file)) + ERROR("swapoff on %s failed: %s\n", file, strerror(errno)); + remove(file); + INFO("swapoff %s\n", file); } enum { @@ -694,8 +768,11 @@ else cnt = -1; - if (min > min_orig) + if (min > min_orig) { + if (opt_temp_swap) + add_temp_swap(); check_swap(); + } INFO("setting HUGEPAGES_TOTAL to %ld\n", min); set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min); @@ -718,6 +795,9 @@ get_pool_size(page_size, &pools[pos]); } + if ((min > min_orig) && opt_temp_swap) + rem_temp_swap(); + /* * HUGEPAGES_TOTAL is not guarenteed to check to exactly the figure * requested should there be insufficient pages. Check the new @@ -790,6 +870,7 @@ {"enable-zone-movable", no_argument, NULL, LONG_MOVABLE_ENABLE}, {"disable-zone-movable", no_argument, NULL, LONG_MOVABLE_DISABLE}, {"hard", no_argument, NULL, LONG_HARD}, + {"add-temp-swap", no_argument, NULL, LONG_ADD_TEMP_SWAP}, {"create-mounts", no_argument, NULL, LONG_CREATE_MOUNTS}, {"create-user-mounts", required_argument, NULL, LONG_CREATE_USER_MOUNTS}, {"create-group-mounts", required_argument, NULL, LONG_CREATE_GROUP_MOUNTS}, @@ -845,6 +926,10 @@ opt_hard = 1; continue; + case LONG_ADD_TEMP_SWAP: + opt_temp_swap = 1; + break; + case LONG_LIST_ALL_MOUNTS: opt_list_mounts = 1; break; Index: libhugetlbfs-tempswap-new/man/hugeadm.8 =================================================================== --- libhugetlbfs-tempswap-new.orig/man/hugeadm.8 2009-05-22 14:31:19.000000000 -0700 +++ libhugetlbfs-tempswap-new/man/hugeadm.8 2009-05-22 14:55:47.000000000 -0700 @@ -148,6 +148,13 @@ to resize the pool up to 5 times and continues to try if progress is being made towards the resize. +.TP +.B --add-temp-swap + +This options is specified with --pool-pages-min to initialize a temporary +swap file for the duration of the pool resize. Swap is only created for a +positive resize, and is then removed once the resize operation is completed. + .PP The following options affect the verbosity of libhugetlbfs.
------------------------------------------------------------------------------ Register Now for Creativity and Technology (CaT), June 3rd, NYC. CaT is a gathering of tech-side developers & brand creativity professionals. Meet the minds behind Google Creative Lab, Visual Complexity, Processing, & iPhoneDevCamp asthey present alongside digital heavyweights like Barbarian Group, R/GA, & Big Spaceship. http://www.creativitycat.com
_______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel