Repository: trafficserver Updated Branches: refs/heads/master 1a0832b3b -> bba557870
TS-3122: Add support for hugepages on Linux Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787 Branch: refs/heads/master Commit: bba557870c05222d302a05ec948871cdde8bf63b Parents: 1a0832b Author: Phil Sorber <[email protected]> Authored: Thu Oct 16 19:58:08 2014 -0600 Committer: Phil Sorber <[email protected]> Committed: Mon Jun 8 09:28:56 2015 -0600 ---------------------------------------------------------------------- .../configuration/records.config.en.rst | 12 ++ iocore/cache/Cache.cc | 10 +- iocore/cache/CacheDir.cc | 53 +++++-- iocore/cache/P_CacheDir.h | 4 +- lib/ts/Makefile.am | 2 + lib/ts/hugepages.cc | 144 +++++++++++++++++++ lib/ts/hugepages.h | 32 +++++ lib/ts/ink_queue.cc | 21 ++- lib/ts/libts.h | 1 + mgmt/RecordsConfig.cc | 2 + proxy/Main.cc | 7 + 11 files changed, 267 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst ---------------------------------------------------------------------- diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst index 694d338..ccced34 100644 --- a/doc/reference/configuration/records.config.en.rst +++ b/doc/reference/configuration/records.config.en.rst @@ -2849,6 +2849,18 @@ Sockets Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be holding at any one time. +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0 + + Enable (1) the use of huge pages on supported platforms. (Currently only Linux) + + You must also enable hugepages at the OS level. In a modern linux Kernel + this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a + sufficiently large value. It is reasonable to use (system + memory/hugepage size) because these pages are only created on demand. + + For more information on the implications of enabling huge pages, see + `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`. + .. ts:cv:: CONFIG proxy.config.http.enabled INT 1 Turn on or off support for HTTP proxying. This is rarely used, the one http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc ---------------------------------------------------------------------- diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index becf713..370c516 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -38,6 +38,8 @@ #include "P_CacheBC.h" #endif +#include "hugepages.h" + // Compilation Options #define USELESS_REENABLES // allow them for now // #define VERIFY_JTEST_DATA @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear) Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len, (double)vol_dirlen(this) / (double)this->len * 100.0); - raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this)); + + raw_dir = NULL; + if (ats_hugepage_enabled()) + raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this)); + if (raw_dir == NULL) + raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this)); + dir = (Dir *)(raw_dir + vol_headerlen(this)); header = (VolHeaderFooter *)raw_dir; footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter))); http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc ---------------------------------------------------------------------- diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc index 3a7b9c4..e0f20d3 100644 --- a/iocore/cache/CacheDir.cc +++ b/iocore/cache/CacheDir.cc @@ -24,6 +24,8 @@ #include "P_Cache.h" +#include "hugepages.h" + // #define LOOP_CHECK_MODE 1 #ifdef LOOP_CHECK_MODE #define DIR_LOOP_THRESHOLD 1000 @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void) Debug("cache_dir_sync", "sync started"); char *buf = NULL; size_t buflen = 0; + bool buf_huge = false; EThread *t = (EThread *)0xdeadbeef; for (int i = 0; i < gnvol; i++) { @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void) #endif if (buflen < dirlen) { - if (buf) - ats_memalign_free(buf); - buf = (char *)ats_memalign(ats_pagesize(), dirlen); + if (buf) { + if (buf_huge) + ats_free_hugepage(buf, buflen); + else + ats_memalign_free(buf); + } buflen = dirlen; + if (ats_hugepage_enabled()) { + buf = (char *)ats_alloc_hugepage(buflen); + buf_huge = true; + } + if (buf == NULL) { + buf = (char *)ats_memalign(ats_pagesize(), buflen); + buf_huge = false; + } } if (!d->dir_sync_in_progress) { @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void) Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get()); } Debug("cache_dir_sync", "sync done"); - if (buf) - ats_memalign_free(buf); + if (buf) { + if (buf_huge) + ats_free_hugepage(buf, buflen); + else + ats_memalign_free(buf); + buflen = 0; + buf = NULL; + buf_huge = false; + } } @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e) Lrestart: if (vol_idx >= gnvol) { vol_idx = 0; - if (buf) { - ats_memalign_free(buf); - buf = 0; - buflen = 0; - } Debug("cache_dir_sync", "sync done"); if (event == EVENT_INTERVAL) trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency)); @@ -1196,10 +1212,21 @@ Lrestart: Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get()); vol->header->dirty = 0; if (buflen < dirlen) { - if (buf) - ats_memalign_free(buf); - buf = (char *)ats_memalign(ats_pagesize(), dirlen); + if (buf) { + if (buf_huge) + ats_free_hugepage(buf, buflen); + else + ats_memalign_free(buf); + } buflen = dirlen; + if (ats_hugepage_enabled()) { + buf = (char *)ats_alloc_hugepage(buflen); + buf_huge = true; + } + if (buf == NULL) { + buf = (char *)ats_memalign(ats_pagesize(), buflen); + buf_huge = false; + } } vol->header->sync_serial++; vol->footer->sync_serial = vol->header->sync_serial; http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h ---------------------------------------------------------------------- diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h index 268ecfb..881d6be 100644 --- a/iocore/cache/P_CacheDir.h +++ b/iocore/cache/P_CacheDir.h @@ -295,6 +295,7 @@ struct CacheSync : public Continuation { int vol_idx; char *buf; size_t buflen; + bool buf_huge; off_t writepos; AIOCallbackInternal io; Event *trigger; @@ -302,7 +303,8 @@ struct CacheSync : public Continuation { int mainEvent(int event, Event *e); void aio_write(int fd, char *b, int n, off_t o); - CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0) + CacheSync() + : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0) { SET_HANDLER(&CacheSync::mainEvent); } http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am ---------------------------------------------------------------------- diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am index 7e04222..18c18cf 100644 --- a/lib/ts/Makefile.am +++ b/lib/ts/Makefile.am @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \ defalloc.h \ fastlz.c \ fastlz.h \ + hugepages.cc \ + hugepages.h \ ink_aiocb.h \ ink_align.h \ ink_apidefs.h \ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc ---------------------------------------------------------------------- diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc new file mode 100644 index 0000000..216bf0b --- /dev/null +++ b/lib/ts/hugepages.cc @@ -0,0 +1,144 @@ +/** @file + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include <cstdio> +#include <sys/mman.h> +#include "Diags.h" +#include "ink_align.h" + +#define DEBUG_TAG "hugepages" +#define MEMINFO_PATH "/proc/meminfo" +#define LINE_SIZE 256 +#define TOKEN "Hugepagesize:" +#define TOKEN_SIZE (strlen(TOKEN)) + +static int hugepage_size = -1; +static bool hugepage_enabled; + +size_t +ats_hugepage_size(void) +{ +#ifdef MAP_HUGETLB + return hugepage_size; +#else + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); + return 0; +#endif +} + +bool +ats_hugepage_enabled(void) +{ +#ifdef MAP_HUGETLB + return hugepage_enabled; +#else + return false; +#endif +} + +void +ats_hugepage_init(int enabled) +{ +#ifdef MAP_HUGETLB + FILE *fp; + char line[LINE_SIZE]; + char *p, *ep; + + hugepage_size = 0; + + if (!enabled) { + Debug(DEBUG_TAG, "hugepages not enabled"); + return; + } + + fp = fopen(MEMINFO_PATH, "r"); + + if (fp == NULL) { + Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH); + return; + } + + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) { + p = line + TOKEN_SIZE; + while (*p == ' ') { + p++; + } + hugepage_size = strtol(p, &ep, 10); + // What other values can this be? + if (strncmp(ep, " kB", 4)) { + hugepage_size *= 1024; + } + break; + } + } + + fclose(fp); + + if (hugepage_size) { + hugepage_enabled = true; + } + + Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size); +#else + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); +#endif +} + +void * +ats_alloc_hugepage(size_t s) +{ +#ifdef MAP_HUGETLB + size_t size; + void *mem; + + size = INK_ALIGN(s, ats_hugepage_size()); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + + if (mem == MAP_FAILED) { + Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size); + return NULL; + } + + return mem; +#else + (void)s; + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); + return NULL; +#endif +} + +bool +ats_free_hugepage(void *ptr, size_t s) +{ +#ifdef MAP_HUGETLB + size_t size; + + size = INK_ALIGN(s, ats_hugepage_size()); + return (munmap(ptr, size) == 0); +#else + (void)ptr; + (void)s; + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); + return false; +#endif +} http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h ---------------------------------------------------------------------- diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h new file mode 100644 index 0000000..812542b --- /dev/null +++ b/lib/ts/hugepages.h @@ -0,0 +1,32 @@ +/** @file + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +#ifndef _hugepages_h_ +#define _hugepages_h_ + +#include <cstring> + +size_t ats_hugepage_size(void); +bool ats_hugepage_enabled(void); +void ats_hugepage_init(int); +void *ats_alloc_hugepage(size_t); +bool ats_free_hugepage(void *, size_t); + +#endif http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc ---------------------------------------------------------------------- diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc index e718b3f..0f14b68 100644 --- a/lib/ts/ink_queue.cc +++ b/lib/ts/ink_queue.cc @@ -50,6 +50,7 @@ #include "ink_assert.h" #include "ink_queue_ext.h" #include "ink_align.h" +#include "hugepages.h" inkcoreapi volatile int64_t fastalloc_mem_in_use = 0; inkcoreapi volatile int64_t fastalloc_mem_total = 0; @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32 /* quick test for power of 2 */ ink_assert(!(alignment & (alignment - 1))); f->alignment = alignment; - f->chunk_size = chunk_size; // Make sure we align *all* the objects in the allocation, not just the first one f->type_size = INK_ALIGN(type_size, alignment); + if (ats_hugepage_enabled()) { + f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size; + } else { + f->chunk_size = chunk_size; + } SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0); f->used = 0; @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f) #ifdef DEBUG char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL; #endif - if (f->alignment) - newp = ats_memalign(f->alignment, f->chunk_size * type_size); - else - newp = ats_malloc(f->chunk_size * type_size); + if (ats_hugepage_enabled()) + newp = ats_alloc_hugepage(f->chunk_size * type_size); + + if (newp == NULL) { + if (f->alignment) + newp = ats_memalign(f->alignment, f->chunk_size * type_size); + else + newp = ats_malloc(f->chunk_size * type_size); + } ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice); - fl_memadd(f->chunk_size * type_size); #ifdef DEBUG newsbrk = (char *)sbrk(0); http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h ---------------------------------------------------------------------- diff --git a/lib/ts/libts.h b/lib/ts/libts.h index f136d74..a99e67f 100644 --- a/lib/ts/libts.h +++ b/lib/ts/libts.h @@ -41,6 +41,7 @@ #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ *** */ +#include "hugepages.h" #include "ink_config.h" #include "ink_platform.h" #include "ink_align.h" http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc ---------------------------------------------------------------------- diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc index ebcb8fd..ec3387a 100644 --- a/mgmt/RecordsConfig.cc +++ b/mgmt/RecordsConfig.cc @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL} , + {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL} + , //############ //# http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc ---------------------------------------------------------------------- diff --git a/proxy/Main.cc b/proxy/Main.cc index 202da33..4684945 100644 --- a/proxy/Main.cc +++ b/proxy/Main.cc @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv) // Restart syslog now that we have configuration info syslog_log_configure(); + // init huge pages + int enabled; + REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages"); + ats_hugepage_init(enabled); + Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize()); + Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size()); + if (!num_accept_threads) REC_ReadConfigInteger(num_accept_threads, "proxy.config.accept_threads");
