On Fri, 22 May 2009, Avantika Mathur wrote:

Comments are inline.

> Please review this initial patch creating the --add-temp-swap flag to 
> temporarily add swap on disk
> during pool resize.
>
> Once this patch is finalized, I will also add on another patch for 
> creating permanent swap.  
>
> I have used system() to run mkswap, but was not sure if there is a better way 
> to do this.
>
> Currently the swap file's size is 20 hugepages -- in my testing so far, less 
> than 1/10 of the file
> is used.  I will continue to test to see what the ideal temporary swap size 
> should be.  Or if this
> could be changed by user input?
>
> I appreciate feedback
> Thanks!
>
> Avantika
> ---
>
>
>

> When growing the hugepage pool with --pool-pages-min, swap space is generally 
> needed to allocate free memory for the resize. 
> 
> For systems that do not have enough or any swap configured, the user can
> specify the --add-temp-swap flag.  When this flag is specified, temporary swap
> space is created and activated on disk for the duration of a positive resize 
> of
> the minimum pool size.  After the resize is completed, the swap is turned off
> and swapfile removed.
> 
> Signed-off-by: Avantika Mathur <mat...@us.ibm.com>
> ---
> Index: libhugetlbfs-tempswap-new/hugeadm.c
> ===================================================================
> --- libhugetlbfs-tempswap-new.orig/hugeadm.c  2009-05-22 09:43:10.000000000 
> -0700
> +++ libhugetlbfs-tempswap-new/hugeadm.c       2009-05-22 14:50:24.000000000 
> -0700
> @@ -36,6 +36,7 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <sys/mount.h>
> +#include <sys/swap.h>
> 
>  #define _GNU_SOURCE /* for getopt_long */
>  #include <unistd.h>
> @@ -85,6 +86,8 @@
>       CONT("Adjust pool 'size' lower bound");
>       OPTION("--pool-pages-max <size>:[+|-]<count>", "");
>       CONT("Adjust pool 'size' upper bound");
> +     OPTION("--add-temp-swap", "Specified with --pool-pages-min to create");
> +     CONT("temporary swap space for the duration of the pool resize");
>       OPTION("--enable-zone-movable", "Use ZONE_MOVABLE for huge pages");
>       OPTION("--disable-zone-movable", "Do not use ZONE_MOVABLE for huge 
> pages");
>       OPTION("--create-mounts", "Creates a mount point for each available");
> @@ -119,6 +122,7 @@
>  int opt_dry_run = 0;
>  int opt_hard = 0;
>  int opt_movable = -1;
> +int opt_temp_swap = 0;
>  int verbose_level = VERBOSITY_DEFAULT;
> 
>  void setup_environment(char *var, char *val)
> @@ -199,6 +203,7 @@
>  #define LONG_MOVABLE_DISABLE (LONG_MOVABLE|'d')
> 
>  #define LONG_HARD            ('h' << 8)
> +#define LONG_ADD_TEMP_SWAP   ('s' << 8)
> 
>  #define LONG_PAGE    ('P' << 8)
>  #define LONG_PAGE_SIZES      (LONG_PAGE|'s')
> @@ -576,14 +581,83 @@
>       swap_total = read_meminfo(SWAP_TOTAL);
>       if (swap_total <= 0) {
>               WARNING("There is no swap space configured, resizing hugepage 
> pool may fail\n");
> +             WARNING("Use --add-temp-swap option to temporarily add swap 
> during the resize\n");
>               return;
>       }
> 
>       swap_sz = read_meminfo(SWAP_FREE);
>       /* meminfo keeps values in kb, but we use bytes for hpage sizes */
>       swap_sz *= 1024;
> -     if (swap_sz <= gethugepagesize())
> +     if (swap_sz <= gethugepagesize()) {
>               WARNING("There is very little swap space free, resizing 
> hugepage pool may fail\n");
> +             WARNING("Use --add-temp-swap option to temporarily add swap 
> during the resize\n");
> +     }
> +}
> +
> +void add_temp_swap()
> +{
> +     char path[PATH_MAX];
> +     char file[PATH_MAX];
> +     char mkswap_cmd[PATH_MAX];
> +     FILE *f, *devzero;
> +     char buf[1024];
> +     long swap_size;
> +     int i=0;
> +     if (geteuid() != 0) {
> +             ERROR("Swap can only be manipulated by root\n");
> +             exit(EXIT_FAILURE);
> +     }
> +
> +     snprintf(path, PATH_MAX, "%s/swap/temp", MOUNT_DIR);
> +     snprintf(file, PATH_MAX, "%s/swapfile", path);
> +
> +     /* swapsize is 20 hugepages (in KB) */
> +     swap_size = (gethugepagesize() / 1024) * 20;
> +
> +     if (ensure_dir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH, 
> 0, 0))
> +             exit(EXIT_FAILURE);
> +
> +     devzero = fopen("/dev/zero", "r");
> +     if (!devzero) {
> +             ERROR("Couldn't open %s: %s\n", file, strerror(errno));
> +             exit(EXIT_FAILURE);
> +     }
> +     if( !fread(buf, sizeof(char), 1024, devzero)) {
> +             ERROR("Failed to read /dev/zero\n");
> +             exit(EXIT_FAILURE);
> +     }
> +     fclose(devzero);

All you are doing here is zeroing the buffer right?

you could replace all that with this:

memset(buf, 0, 1024);

> +
> +     f = fopen(file, "w");
> +     if (!f) {
> +             ERROR("Couldn't open %s: %s\n", file, strerror(errno));
> +             exit(EXIT_FAILURE);
> +     }
> +     while (i < swap_size) {
> +             if (!fwrite(buf, sizeof(char), 1024, f)) {
> +                     ERROR("Failed to read %s\n", file);
> +                     exit(EXIT_FAILURE);
> +             }
> +             i++;
> +     }
> +     fclose(f);

Have you tried this on ppc64?  As a user, can you notice the increase in time 
taken
to resize a pool when this option is used?  If so, it might be easier to 
allocate a
single buffer of zeros on the heap than try and write to the file 1KB at a time.


> +
> +     snprintf(mkswap_cmd, PATH_MAX, "mkswap %s", file);
> +     system(mkswap_cmd);
> +
> +     INFO("swapon %s\n", file);
> +     if (swapon(file, 0))
> +             ERROR("swapon on %s failed: %s\n", file, strerror(errno));
> +}
> +
> +void rem_temp_swap() {
> +     char file[PATH_MAX];
> +     snprintf(file, PATH_MAX, "%s/swap/temp/swapfile", MOUNT_DIR);
> +
> +     if (swapoff(file))
> +             ERROR("swapoff on %s failed: %s\n", file, strerror(errno));
> +     remove(file);
> +     INFO("swapoff %s\n", file);
>  }
> 
>  enum {
> @@ -694,8 +768,11 @@
>       else
>               cnt = -1;
> 
> -     if (min > min_orig)
> +     if (min > min_orig) {
> +             if (opt_temp_swap)
> +                     add_temp_swap();
>               check_swap();
> +     }
> 
>       INFO("setting HUGEPAGES_TOTAL to %ld\n", min);
>       set_huge_page_counter(page_size, HUGEPAGES_TOTAL, min);
> @@ -718,6 +795,9 @@
>               get_pool_size(page_size, &pools[pos]);
>       }
> 
> +     if ((min > min_orig) && opt_temp_swap)
> +             rem_temp_swap();
> +

I think this first check should be dropped, if temp swap space is added it 
should
be removed, regardless of what happened while it was enabled.

Everything else looks good to me.

>       /*
>        * HUGEPAGES_TOTAL is not guarenteed to check to exactly the figure
>        * requested should there be insufficient pages.  Check the new
> @@ -790,6 +870,7 @@
>               {"enable-zone-movable", no_argument, NULL, LONG_MOVABLE_ENABLE},
>               {"disable-zone-movable", no_argument, NULL, 
> LONG_MOVABLE_DISABLE},
>               {"hard", no_argument, NULL, LONG_HARD},
> +             {"add-temp-swap", no_argument, NULL, LONG_ADD_TEMP_SWAP},
>               {"create-mounts", no_argument, NULL, LONG_CREATE_MOUNTS},
>               {"create-user-mounts", required_argument, NULL, 
> LONG_CREATE_USER_MOUNTS},
>               {"create-group-mounts", required_argument, NULL, 
> LONG_CREATE_GROUP_MOUNTS},
> @@ -845,6 +926,10 @@
>                       opt_hard = 1;
>                       continue;
> 
> +             case LONG_ADD_TEMP_SWAP:
> +                     opt_temp_swap = 1;
> +                     break;
> +
>               case LONG_LIST_ALL_MOUNTS:
>                       opt_list_mounts = 1;
>                       break;
> Index: libhugetlbfs-tempswap-new/man/hugeadm.8
> ===================================================================
> --- libhugetlbfs-tempswap-new.orig/man/hugeadm.8      2009-05-22 
> 14:31:19.000000000 -0700
> +++ libhugetlbfs-tempswap-new/man/hugeadm.8   2009-05-22 14:55:47.000000000 
> -0700
> @@ -148,6 +148,13 @@
>  to resize the pool up to 5 times and continues to try if progress is being
>  made towards the resize.
> 
> +.TP
> +.B --add-temp-swap
> +
> +This options is specified with --pool-pages-min to initialize a temporary
> +swap file for the duration of the pool resize.  Swap is only created for a
> +positive resize, and is then removed once the resize operation is completed.
> +
>  .PP
>  The following options affect the verbosity of libhugetlbfs.
> 

> ------------------------------------------------------------------------------
> Register Now for Creativity and Technology (CaT), June 3rd, NYC. CaT
> is a gathering of tech-side developers & brand creativity professionals. Meet
> the minds behind Google Creative Lab, Visual Complexity, Processing, & 
> iPhoneDevCamp asthey present alongside digital heavyweights like Barbarian
> Group, R/GA, & Big Spaceship. http://www.creativitycat.com 
> _______________________________________________
> Libhugetlbfs-devel mailing list
> Libhugetlbfs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel


-- 
Eric B Munson
IBM Linux Technology Center
ebmun...@us.ibm.com

Attachment: signature.asc
Description: Digital signature

------------------------------------------------------------------------------
Register Now for Creativity and Technology (CaT), June 3rd, NYC. CaT
is a gathering of tech-side developers & brand creativity professionals. Meet
the minds behind Google Creative Lab, Visual Complexity, Processing, & 
iPhoneDevCamp asthey present alongside digital heavyweights like Barbarian
Group, R/GA, & Big Spaceship. http://www.creativitycat.com 
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to