Please review this initial patch creating the --add-temp-swap flag to 
temporarily add swap on disk
during pool resize.

Once this patch is finalized, I will also add on another patch for creating permanent swap.
I have used system() to run mkswap, but was not sure if there is a better way 
to do this.

Currently the swap file's size is 20 hugepages -- in my testing so far, less 
than 1/10 of the file
is used.  I will continue to test to see what the ideal temporary swap size 
should be.  Or if this
could be changed by user input?

I appreciate feedback
Thanks!

Avantika
---



When growing the hugepage pool with --pool-pages-min, swap space is generally 
needed to allocate free memory for the resize. 

For systems that do not have enough or any swap configured, the user can
specify the --add-temp-swap flag.  When this flag is specified, temporary swap
space is created and activated on disk for the duration of a positive resize of
the minimum pool size.  After the resize is completed, the swap is turned off
and swapfile removed.

Signed-off-by: Avantika Mathur <mat...@us.ibm.com>
---
Index: libhugetlbfs-tempswap-new/hugeadm.c
===================================================================
--- libhugetlbfs-tempswap-new.orig/hugeadm.c	2009-05-22 09:43:10.000000000 -0700
+++ libhugetlbfs-tempswap-new/hugeadm.c	2009-05-22 14:50:24.000000000 -0700
@@ -36,6 +36,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/mount.h>
+#include <sys/swap.h>
 
 #define _GNU_SOURCE /* for getopt_long */
 #include <unistd.h>
@@ -85,6 +86,8 @@
 	CONT("Adjust pool 'size' lower bound");
 	OPTION("--pool-pages-max <size>:[+|-]<count>", "");
 	CONT("Adjust pool 'size' upper bound");
+	OPTION("--add-temp-swap", "Specified with --pool-pages-min to create");
+	CONT("temporary swap space for the duration of the pool resize");
 	OPTION("--enable-zone-movable", "Use ZONE_MOVABLE for huge pages");
 	OPTION("--disable-zone-movable", "Do not use ZONE_MOVABLE for huge pages");
 	OPTION("--create-mounts", "Creates a mount point for each available");
@@ -119,6 +122,7 @@
 int opt_dry_run = 0;
 int opt_hard = 0;
 int opt_movable = -1;
+int opt_temp_swap = 0;
 int verbose_level = VERBOSITY_DEFAULT;
 
 void setup_environment(char *var, char *val)
@@ -199,6 +203,7 @@
 #define LONG_MOVABLE_DISABLE	(LONG_MOVABLE|'d')
 
 #define LONG_HARD		('h' << 8)
+#define LONG_ADD_TEMP_SWAP	('s' << 8)
 
 #define LONG_PAGE	('P' << 8)
 #define LONG_PAGE_SIZES	(LONG_PAGE|'s')
@@ -576,14 +581,83 @@
 	swap_total = read_meminfo(SWAP_TOTAL);
 	if (swap_total <= 0) {
 		WARNING("There is no swap space configured, resizing hugepage pool may fail\n");
+		WARNING("Use --add-temp-swap option to temporarily add swap during the resize\n");
 		return;
 	}
 
 	swap_sz = read_meminfo(SWAP_FREE);
 	/* meminfo keeps values in kb, but we use bytes for hpage sizes */
 	swap_sz *= 1024;
-	if (swap_sz <= gethugepagesize())
+	if (swap_sz <= gethugepagesize()) {
 		WARNING("There is very little swap space free, resizing hugepage pool may fail\n");
+		WARNING("Use --add-temp-swap option to temporarily add swap during the resize\n");
+	}
+}
+
+void add_temp_swap()
+{
+	char path[PATH_MAX];
+	char file[PATH_MAX];
+	char mkswap_cmd[PATH_MAX];
+	FILE *f, *devzero;
+	char buf[1024];
+	long swap_size;
+	int i=0;
+	if (geteuid() != 0) {
+		ERROR("Swap can only be manipulated by root\n");
+		exit(EXIT_FAILURE);
+	}
+
+	snprintf(path, PATH_MAX, "%s/swap/temp", MOUNT_DIR);
+	snprintf(file, PATH_MAX, "%s/swapfile", path);
+
+	/* swapsize is 20 hugepages (in KB) */
+	swap_size = (gethugepagesize() / 1024) * 20;
+
+	if (ensure_dir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH, 0, 0))
+		exit(EXIT_FAILURE);
+
+	devzero = fopen("/dev/zero", "r");
+	if (!devzero) {
+		ERROR("Couldn't open %s: %s\n", file, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	if( !fread(buf, sizeof(char), 1024, devzero)) {
+		ERROR("Failed to read /dev/zero\n");
+		exit(EXIT_FAILURE);
+	}
+	fclose(devzero);
+
+	f = fopen(file, "w");
+	if (!f) {
+		ERROR("Couldn't open %s: %s\n", file, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	while (i < swap_size) {
+		if (!fwrite(buf, sizeof(char), 1024, f)) {
+			ERROR("Failed to read %s\n", file);
+			exit(EXIT_FAILURE);
+		}
+		i++;
+	}
+	fclose(f);
+
+	snprintf(mkswap_cmd, PATH_MAX, "mkswap %s", file);
+	system(mkswap_cmd);
+
+	INFO("swapon %s\n", file);
+	if (swapon(file, 0))
+		ERROR("swapon on %s failed: %s\n", file, strerror(errno));
+}
+
+void rem_temp_swap() {
+	char file[PATH_MAX];
+	snprintf(file, PATH_MAX, "%s/swap/temp/swapfile", MOUNT_DIR);
+
+	if (swapoff(file))
+		ERROR("swapoff on %s failed: %s\n", file, strerror(errno));
+	remove(file);
+	INFO("swapoff %s\n", file);
 }
 
 enum {
@@ -694,8 +768,11 @@
 	else
 		cnt = -1;
 
-	if (min > min_orig)
+	if (min > min_orig) {
+		if (opt_temp_swap)
+			add_temp_swap();
 		check_swap();
+	}
 
 	INFO("setting HUGEPAGES_TOTAL to %ld\n", min);
 	set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
@@ -718,6 +795,9 @@
 		get_pool_size(page_size, &pools[pos]);
 	}
 
+	if ((min > min_orig) && opt_temp_swap)
+		rem_temp_swap();
+
 	/*
 	 * HUGEPAGES_TOTAL is not guarenteed to check to exactly the figure
 	 * requested should there be insufficient pages.  Check the new
@@ -790,6 +870,7 @@
 		{"enable-zone-movable", no_argument, NULL, LONG_MOVABLE_ENABLE},
 		{"disable-zone-movable", no_argument, NULL, LONG_MOVABLE_DISABLE},
 		{"hard", no_argument, NULL, LONG_HARD},
+		{"add-temp-swap", no_argument, NULL, LONG_ADD_TEMP_SWAP},
 		{"create-mounts", no_argument, NULL, LONG_CREATE_MOUNTS},
 		{"create-user-mounts", required_argument, NULL, LONG_CREATE_USER_MOUNTS},
 		{"create-group-mounts", required_argument, NULL, LONG_CREATE_GROUP_MOUNTS},
@@ -845,6 +926,10 @@
 			opt_hard = 1;
 			continue;
 
+		case LONG_ADD_TEMP_SWAP:
+			opt_temp_swap = 1;
+			break;
+
 		case LONG_LIST_ALL_MOUNTS:
 			opt_list_mounts = 1;
 			break;
Index: libhugetlbfs-tempswap-new/man/hugeadm.8
===================================================================
--- libhugetlbfs-tempswap-new.orig/man/hugeadm.8	2009-05-22 14:31:19.000000000 -0700
+++ libhugetlbfs-tempswap-new/man/hugeadm.8	2009-05-22 14:55:47.000000000 -0700
@@ -148,6 +148,13 @@
 to resize the pool up to 5 times and continues to try if progress is being
 made towards the resize.
 
+.TP
+.B --add-temp-swap
+
+This options is specified with --pool-pages-min to initialize a temporary
+swap file for the duration of the pool resize.  Swap is only created for a
+positive resize, and is then removed once the resize operation is completed.
+
 .PP
 The following options affect the verbosity of libhugetlbfs.
 
------------------------------------------------------------------------------
Register Now for Creativity and Technology (CaT), June 3rd, NYC. CaT
is a gathering of tech-side developers & brand creativity professionals. Meet
the minds behind Google Creative Lab, Visual Complexity, Processing, & 
iPhoneDevCamp asthey present alongside digital heavyweights like Barbarian
Group, R/GA, & Big Spaceship. http://www.creativitycat.com 
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to