Transparent huge pages (THP) have two tunables in sysfs used to control the
behavior of khugepaged.  This patch adds the support for controlling these
tunables via hugeadm options.

Signed-off-by: Eric B Munson <emun...@mgebm.net>
---
 hugeadm.c     |  162 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 man/hugeadm.8 |   65 +++++++++++++++++++++++
 2 files changed, 227 insertions(+), 0 deletions(-)

diff --git a/hugeadm.c b/hugeadm.c
index 26056e9..65e991a 100644
--- a/hugeadm.c
+++ b/hugeadm.c
@@ -83,6 +83,18 @@ extern char *optarg;
 #define SWAP_FREE "SwapFree:"
 #define SWAP_TOTAL "SwapTotal:"
 
+#define ALWAYS           "always"
+#define MADVISE                  "madvise"
+#define NEVER            "never"
+#define YES              "yes"
+#define NO               "no"
+#define TRANS_ENABLE     "/sys/kernel/mm/transparent_hugepage/enabled"
+#define TRANS_DEFRAG     "/sys/kernel/mm/transparent_hugepage/defrag"
+#define KHUGE_DEFRAG     
"/sys/kernel/mm/transparent_hugepage/khugepaged/defrag"
+#define KHUGE_SCAN_PAGES  
"/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan"
+#define KHUGE_SCAN_SLEEP  
"/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs"
+#define KHUGE_ALLOC_SLEEP 
"/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs"
+
 void print_usage()
 {
        fprintf(stderr, "hugeadm [options]\n");
@@ -97,6 +109,20 @@ void print_usage()
        CONT("Adjust pool 'size' lower bound");
        OPTION("--obey-mempolicy", "Obey the NUMA memory policy when");
        CONT("adjusting the pool 'size' lower bound");
+       OPTION("--trans-always", "Enable transparent huge pages always");
+       OPTION("--trans-madvise", "Enable transparent huge pages with madvise");
+       OPTION("--trans-never", "Disable transparent huge pages");
+       OPTION("--trans-defrag-always", "Don't limit THP defrag");
+       OPTION("--trans-defrag-madvise", "Limit THP defrag to madvise areas 
only");
+       OPTION("--trans-defrag-never", "Disable defrag during THP allocation");
+       OPTION("--khugepaged-defrag-off", "Disable defrag during THP 
promotion");
+       OPTION("--khugepaged-defrag-on", "Enable defrag during THP promotion");
+       OPTION("--khugepaged-pages <pages to scan>", "Number of pages that 
khugepaged");
+       CONT("should scan on each pass");
+       OPTION("--khugepaged-scan-sleep <milliseconds>", "Time in ms to sleep 
between");
+       CONT("khugepaged passes");
+       OPTION("--khugepages-alloc-sleep <milliseconds>", "Time in ms for 
khugepaged");
+       CONT("to wait if there was a huge page allocation failure");
        OPTION("--pool-pages-max 
<size|DEFAULT>:[+|-]<pagecount|memsize<G|M|K>>", "");
        CONT("Adjust pool 'size' upper bound");
        OPTION("--set-recommended-min_free_kbytes", "");
@@ -271,6 +297,21 @@ void verbose_expose(void)
 
 #define LONG_EXPLAIN   ('e' << 8)
 
+#define LONG_TRANS                     ('t' << 8)
+#define LONG_TRANS_ALWAYS              (LONG_TRANS|'a')
+#define LONG_TRANS_MADVISE             (LONG_TRANS|'m')
+#define LONG_TRANS_NEVER               (LONG_TRANS|'n')
+#define LONG_TRANS_DEFRAG_ALWAYS       (LONG_TRANS|'A')
+#define LONG_TRANS_DEFRAG_MADVISE      (LONG_TRANS|'M')
+#define LONG_TRANS_DEFRAG_NEVER                (LONG_TRANS|'N')
+
+#define LONG_KHUGE                     ('K' << 8)
+#define LONG_KHUGE_DEFRAG_ON           (LONG_KHUGE|'y')
+#define LONG_KHUGE_DEFRAG_OFF          (LONG_KHUGE|'n')
+#define LONG_KHUGE_PAGES               (LONG_KHUGE|'p')
+#define LONG_KHUGE_SCAN                        (LONG_KHUGE|'s')
+#define LONG_KHUGE_ALLOC               (LONG_KHUGE|'a')
+
 #define MAX_POOLS      32
 
 static int cmpsizes(const void *p1, const void *p2)
@@ -1062,6 +1103,30 @@ void rem_ramdisk_swap(){
        }
 }
 
+void set_trans_opt(const char *file, const char *value)
+{
+       FILE *f;
+
+       if (geteuid() != 0) {
+                ERROR("Transparant huge page options can only be set by 
root\n");
+                exit(EXIT_FAILURE);
+        }
+
+       if (opt_dry_run) {
+               printf("echo %s > %s\n", file, value);
+               return;
+       }
+
+       f = fopen(file, "w");
+       if (!f) {
+               ERROR("Couldn't open %s: %s\n", file, strerror(errno));
+               return;
+       }
+
+       fprintf(f, "%s", value);
+       fclose(f);
+}
+
 enum {
        POOL_MIN,
        POOL_MAX,
@@ -1308,7 +1373,13 @@ int main(int argc, char** argv)
        int opt_list_mounts = 0, opt_pool_list = 0, opt_create_mounts = 0;
        int opt_global_mounts = 0, opt_pgsizes = 0, opt_pgsizes_all = 0;
        int opt_explain = 0, minadj_count = 0, maxadj_count = 0;
+       int opt_trans_always = 0, opt_trans_never = 0, opt_trans_madvise = 0;
+       int opt_trans_defrag_always = 0, opt_trans_defrag_never = 0;
+       int opt_trans_defrag_madvise = 0, opt_khuge_defrag_off = 0;
+       int opt_khuge_defrag_on = 0, opt_khuge_pages = 0, opt_khuge_scan = 0;
+       int opt_khuge_alloc = 0;
        int ret = 0, index = 0;
+       char *khuge_pages = NULL, *khuge_alloc = NULL, *khuge_scan = NULL;
        gid_t opt_gid = 0;
        struct group *opt_grp = NULL;
        int group_invalid = 0;
@@ -1321,6 +1392,17 @@ int main(int argc, char** argv)
                {"pool-pages-min", required_argument, NULL, LONG_POOL_MIN_ADJ},
                {"pool-pages-max", required_argument, NULL, LONG_POOL_MAX_ADJ},
                {"obey-mempolicy", no_argument, NULL, LONG_POOL_MEMPOL},
+               {"trans-always", no_argument, NULL, LONG_TRANS_ALWAYS},
+               {"trans-madvise", no_argument, NULL, LONG_TRANS_MADVISE},
+               {"trans-never", no_argument, NULL, LONG_TRANS_NEVER},
+               {"trans-defrag-always", no_argument, NULL, 
LONG_TRANS_DEFRAG_ALWAYS},
+               {"trans-defrag-madvise", no_argument, NULL, 
LONG_TRANS_DEFRAG_MADVISE},
+               {"trans-defrag-never", no_argument, NULL, 
LONG_TRANS_DEFRAG_NEVER},
+               {"khugepaged-defrag-on", no_argument, NULL, 
LONG_KHUGE_DEFRAG_ON},
+               {"khugepaged-defrag-off", no_argument, NULL, 
LONG_KHUGE_DEFRAG_OFF},
+               {"khugepaged-pages", required_argument, NULL, LONG_KHUGE_PAGES},
+               {"khugepaged-scan-sleep", required_argument, NULL, 
LONG_KHUGE_SCAN},
+               {"khugepaged-alloc-sleep", required_argument, NULL, 
LONG_KHUGE_ALLOC},
                {"set-recommended-min_free_kbytes", no_argument, NULL, 
LONG_SET_RECOMMENDED_MINFREEKBYTES},
                {"set-recommended-shmmax", no_argument, NULL, 
LONG_SET_RECOMMENDED_SHMMAX},
                {"set-shm-group", required_argument, NULL, 
LONG_SET_HUGETLB_SHM_GROUP},
@@ -1424,6 +1506,53 @@ int main(int argc, char** argv)
                        opt_obey_mempolicy = 1;
                        break;
 
+               case LONG_TRANS_ALWAYS:
+                       opt_trans_always = 1;
+                       break;
+
+               case LONG_TRANS_MADVISE:
+                       opt_trans_madvise = 1;
+                       break;
+
+               case LONG_TRANS_NEVER:
+                       opt_trans_never = 1;
+                       break;
+
+               case LONG_TRANS_DEFRAG_ALWAYS:
+                       opt_trans_defrag_always = 1;
+                       break;
+
+               case LONG_TRANS_DEFRAG_MADVISE:
+                       opt_trans_defrag_madvise = 1;
+                       break;
+
+               case LONG_TRANS_DEFRAG_NEVER:
+                       opt_trans_defrag_never = 1;
+                       break;
+
+               case LONG_KHUGE_DEFRAG_ON:
+                       opt_khuge_defrag_on = 1;
+                       break;
+
+               case LONG_KHUGE_DEFRAG_OFF:
+                       opt_khuge_defrag_off = 1;
+                       break;
+
+               case LONG_KHUGE_PAGES:
+                       opt_khuge_pages = 1;
+                       khuge_pages = optarg;
+                       break;
+
+               case LONG_KHUGE_SCAN:
+                       opt_khuge_scan = 1;
+                       khuge_scan = optarg;
+                       break;
+
+               case LONG_KHUGE_ALLOC:
+                       opt_khuge_alloc = 1;
+                       khuge_alloc = optarg;
+                       break;
+
                case LONG_POOL_MAX_ADJ:
                        if (! kernel_has_overcommit()) {
                                ERROR("kernel does not support overcommit, "
@@ -1534,6 +1663,39 @@ int main(int argc, char** argv)
        if (opt_movable != -1)
                setup_zone_movable(opt_movable);
 
+       if (opt_trans_always)
+               set_trans_opt(TRANS_ENABLE, ALWAYS);
+
+       if (opt_trans_madvise)
+               set_trans_opt(TRANS_ENABLE, MADVISE);
+
+       if (opt_trans_never)
+               set_trans_opt(TRANS_ENABLE, NEVER);
+
+       if (opt_trans_defrag_always)
+               set_trans_opt(TRANS_DEFRAG, ALWAYS);
+
+       if (opt_trans_defrag_madvise)
+               set_trans_opt(TRANS_DEFRAG, MADVISE);
+
+       if (opt_trans_defrag_never)
+               set_trans_opt(TRANS_DEFRAG, NEVER);
+
+       if (opt_khuge_defrag_on)
+               set_trans_opt(KHUGE_DEFRAG, YES);
+
+       if (opt_khuge_defrag_off)
+               set_trans_opt(KHUGE_DEFRAG, NO);
+
+       if (opt_khuge_pages)
+               set_trans_opt(KHUGE_SCAN_PAGES, khuge_pages);
+
+       if (opt_khuge_alloc)
+               set_trans_opt(KHUGE_ALLOC_SLEEP, khuge_alloc);
+
+       if (opt_khuge_scan)
+               set_trans_opt(KHUGE_SCAN_SLEEP, khuge_scan);
+
        if (opt_set_recommended_minfreekbytes)
                set_recommended_minfreekbytes();
 
diff --git a/man/hugeadm.8 b/man/hugeadm.8
index 0c577bc..7d89d85 100644
--- a/man/hugeadm.8
+++ b/man/hugeadm.8
@@ -238,10 +238,75 @@ make the swap space persist after the resize operation is 
completed.  The swap
 spaces can later be removed manually using the swapoff command.
 
 .PP
+The following options tune the transparent huge page usage
+
+.TP
+.B --trans-always
+
+Enable transparent huge pages always
+
+.TP
+.B --trans-madvise
+
+Enable transparent huge pages only on madvised regions
+
+.TP
+.B --trans-never
+
+Disable transparent huge pages
+
+.TP
+.B --trans-defrag-always
+
+Always use memory defragmentation to free contiguous memory for huge page
+allocations
+
+.TP
+.B --trans-defrag-madvise
+
+Only use memory defragmentation for allocating huge pages in madvised regions
+
+.TP
+.B --trans-defrag-never
+
+Never use memory defragmentation for allocating huge pages, if no contiguous
+memory is available fall back to base pages
+
+.TP
+.B --khugepaged-defrag-off
+
+The --trans-defrag-* options control the use of defragmentation during huge
+page allocation.  Even with this set to never, khugepaged can still run the
+defragmentation logic when it attempts to promote an area to a huge page.
+Use this option to disable defragmentation during attempted promotions
+
+.TP
+.B --khugepaged-defrag-on
+
+Enable defragmentation during huge page promotoin (this is on be default)
+
+.TP
+.B --khugepaged-pages <pages to scan>
+
+Configure the number of pages that khugepaged should scan on each pass
+
+.TP
+.B --khugepaged-scan-sleep <milliseconds>
+
+Configure how many milliseconds khugepaged should wait between passes
+
+.TP
+.B --khugepages-alloc-sleep <milliseconds>
+
+Configure how many milliseconds khugepaged should wait after failing to
+allocate a huge page to throttle the next attempt.
+
+.PP
 The following options affect the verbosity of libhugetlbfs.
 
 .TP
 .B --verbose <level>, -v
+
 The default value for the verbosity level is 1 and the range of the value can
 be set with --verbose from 0 to 99. The higher the value, the more verbose the
 library will be. 0 is quiet and 3 will output much debugging information. The
-- 
1.7.4.1


------------------------------------------------------------------------------
vRanger cuts backup time in half-while increasing security.
With the market-leading solution for virtual backup and recovery, 
you get blazing-fast, flexible, and affordable data protection.
Download your free trial now. 
http://p.sf.net/sfu/quest-d2dcopy1
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to