Repository: kudu Updated Branches: refs/heads/master eb4d88f0a -> 0061f32d1
Add validation and docs on setting block_cache_capacity_mb too high Raising --block_cache_capacity_mb can improve Kudu's performance, but raising it too high (above the memory pressure threshold, which is --memory_pressure_percentage of the hard memory limit) causes constant flushing even if write throughput is low. This patch adds validation of the capacity using a group flag validator, and does not permit Kudu to start if the block cache size can can cause Kudu to get stuck in aggressive flushing mode. It warns if the capacity is large enough (> 50%) of the pressure threshold. I chose 50% as the max recommended block cache size as a percentage of the memory pressure threshold somewhat arbitrarily. It seems safe, but also leaves room for allocating a large block cache. There's no automated tests but I did verify the expected messages appeared in the logs and the server fails to start when --block_cache_capacity_mb is too high. Change-Id: Ia99fa95c578235beb3a1c72a33a22b4d8ff4800c Reviewed-on: http://gerrit.cloudera.org:8080/10266 Reviewed-by: Alexey Serbin <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/297b72bd Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/297b72bd Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/297b72bd Branch: refs/heads/master Commit: 297b72bd26cdd546f0a73cda7487c80566388492 Parents: eb4d88f Author: Will Berkeley <[email protected]> Authored: Tue May 1 09:45:40 2018 -0700 Committer: Will Berkeley <[email protected]> Committed: Thu May 3 22:49:54 2018 +0000 ---------------------------------------------------------------------- docs/troubleshooting.adoc | 10 +++++ src/kudu/cfile/block_cache.cc | 43 +++++++++++++++++++- .../integration-tests/client-stress-test.cc | 4 ++ .../integration-tests/raft_consensus-itest.cc | 4 ++ src/kudu/util/process_memory.cc | 19 +++++---- src/kudu/util/process_memory.h | 6 +++ 6 files changed, 76 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/docs/troubleshooting.adoc ---------------------------------------------------------------------- diff --git a/docs/troubleshooting.adoc b/docs/troubleshooting.adoc index 51d412c..edb8991 100644 --- a/docs/troubleshooting.adoc +++ b/docs/troubleshooting.adoc @@ -532,6 +532,16 @@ are several ways to relieve the memory pressure on Kudu: Generally, the recommended ratio of maintenance manager threads to data directories is 1:3. - Reduce the volume of writes flowing to Kudu on the application side. +Finally, check the value of `--block_cache_capacity_mb`. This setting determines +the maximum size of Kudu's block cache. While a higher value can help with read +and write performance, setting it too high (as a percentage of `--memory_limit_hard_bytes`) +is harmful. Do not raise `--block_cache_capacity_mb` above `--memory_pressure_percentage` +(default 60%) of `--memory_limit_hard_bytes`, as this will cause Kudu to flush +aggressively even if write throughput is low. Keeping the block cache capacity +below 50% of the memory pressure percentage times the hard limit is recommended. +With the defaults, this means the `--block_cache_capacity_mb` should not exceed +30% of `--memory_limit_hard_bytes`. + [[heap_sampling]] === Heap Sampling http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/src/kudu/cfile/block_cache.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/block_cache.cc b/src/kudu/cfile/block_cache.cc index 12ab3bb..42c4201 100644 --- a/src/kudu/cfile/block_cache.cc +++ b/src/kudu/cfile/block_cache.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include "kudu/cfile/block_cache.h" + #include <cstddef> #include <cstdint> #include <ostream> @@ -23,17 +25,24 @@ #include <gflags/gflags.h> #include <glog/logging.h> -#include "kudu/cfile/block_cache.h" #include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/macros.h" +#include "kudu/gutil/strings/substitute.h" #include "kudu/util/cache.h" #include "kudu/util/flag_tags.h" +#include "kudu/util/flag_validators.h" +#include "kudu/util/process_memory.h" #include "kudu/util/slice.h" #include "kudu/util/string_case.h" DEFINE_int64(block_cache_capacity_mb, 512, "block cache capacity in MB"); TAG_FLAG(block_cache_capacity_mb, stable); +DEFINE_bool(force_block_cache_capacity, false, + "Force Kudu to accept the block cache size, even if it is unsafe."); +TAG_FLAG(force_block_cache_capacity, unsafe); +TAG_FLAG(force_block_cache_capacity, hidden); + DEFINE_string(block_cache_type, "DRAM", "Which type of block cache to use for caching data. " "Valid choices are 'DRAM' or 'NVM'. DRAM, the default, " @@ -41,6 +50,8 @@ DEFINE_string(block_cache_type, "DRAM", "in a memory-mapped file using the NVML library."); TAG_FLAG(block_cache_type, experimental); +using strings::Substitute; + template <class T> class scoped_refptr; namespace kudu { @@ -56,8 +67,38 @@ Cache* CreateCache(int64_t capacity) { return NewLRUCache(t, capacity, "block_cache"); } +// Validates the block cache capacity won't permit the cache to grow large enough +// to cause pernicious flushing behavior. See KUDU-2318. +bool ValidateBlockCacheCapacity() { + if (FLAGS_force_block_cache_capacity) { + return true; + } + int64_t capacity = FLAGS_block_cache_capacity_mb * 1024 * 1024; + int64_t mpt = process_memory::MemoryPressureThreshold(); + if (capacity > mpt) { + LOG(ERROR) << Substitute("Block cache capacity exceeds the memory pressure " + "threshold ($0 bytes vs. $1 bytes). This will " + "cause instability and harmful flushing behavior. " + "Lower --block_cache_capacity_mb or raise " + "--memory_limit_hard_bytes.", + capacity, mpt); + return false; + } + if (capacity > mpt / 2) { + LOG(WARNING) << Substitute("Block cache capacity exceeds 50% of the memory " + "pressure threshold ($0 bytes vs. 50% of $1 bytes). " + "This may cause performance problems. Consider " + "lowering --block_cache_capacity_mb or raising " + "--memory_limit_hard_bytes.", + capacity, mpt); + } + return true; +} + } // anonymous namespace +GROUP_FLAG_VALIDATOR(block_cache_capacity_mb, ValidateBlockCacheCapacity); + CacheType BlockCache::GetConfiguredCacheTypeOrDie() { ToUpperCase(FLAGS_block_cache_type, &FLAGS_block_cache_type); if (FLAGS_block_cache_type == "NVM") { http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/src/kudu/integration-tests/client-stress-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/client-stress-test.cc b/src/kudu/integration-tests/client-stress-test.cc index 49d9f32..102de8b 100644 --- a/src/kudu/integration-tests/client-stress-test.cc +++ b/src/kudu/integration-tests/client-stress-test.cc @@ -237,6 +237,10 @@ class ClientStressTest_LowMemory : public ClientStressTest { opts.extra_tserver_flags.push_back(Substitute( "--memory_limit_hard_bytes=$0", kMemLimitBytes)); opts.extra_tserver_flags.emplace_back("--memory_limit_soft_percentage=0"); + // Since --memory_limit_soft_percentage=0, any nonzero block cache usage + // will cause memory pressure. Since that's part of the point of the test, + // we'll allow it. + opts.extra_tserver_flags.push_back("--force_block_cache_capacity"); return opts; } }; http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/src/kudu/integration-tests/raft_consensus-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc index e62da62..09f464f 100644 --- a/src/kudu/integration-tests/raft_consensus-itest.cc +++ b/src/kudu/integration-tests/raft_consensus-itest.cc @@ -1800,6 +1800,10 @@ TEST_F(RaftConsensusITest, TestEarlyCommitDespiteMemoryPressure) { // When using tcmalloc, we set it to 30MB, since we can get accurate process memory // usage statistics. Otherwise, set to only 4MB, since we'll only be throttling based // on our tracked memory. + // Since part of the point of the test is to have memory pressure, don't + // insist the block cache capacity is small compared to the memory pressure + // threshold. + "--force_block_cache_capacity", #ifdef TCMALLOC_ENABLED "--memory_limit_hard_bytes=30000000", #else http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/src/kudu/util/process_memory.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/process_memory.cc b/src/kudu/util/process_memory.cc index 224a656..d2f3653 100644 --- a/src/kudu/util/process_memory.cc +++ b/src/kudu/util/process_memory.cc @@ -178,15 +178,6 @@ void DoInitLimits() { g_pressure_threshold = FLAGS_memory_pressure_percentage * g_hard_limit / 100; g_rand = new ThreadSafeRandom(1); - - LOG(INFO) << StringPrintf("Process hard memory limit is %.6f GB", - (static_cast<float>(g_hard_limit) / (1024.0 * 1024.0 * 1024.0))); - LOG(INFO) << StringPrintf("Process soft memory limit is %.6f GB", - (static_cast<float>(g_soft_limit) / - (1024.0 * 1024.0 * 1024.0))); - LOG(INFO) << StringPrintf("Process memory pressure threshold is %.6f GB", - (static_cast<float>(g_pressure_threshold) / - (1024.0 * 1024.0 * 1024.0))); } void InitLimits() { @@ -228,6 +219,16 @@ int64_t HardLimit() { return g_hard_limit; } +int64_t SoftLimit() { + InitLimits(); + return g_soft_limit; +} + +int64_t MemoryPressureThreshold() { + InitLimits(); + return g_pressure_threshold; +} + bool UnderMemoryPressure(double* current_capacity_pct) { InitLimits(); int64_t consumption = CurrentConsumption(); http://git-wip-us.apache.org/repos/asf/kudu/blob/297b72bd/src/kudu/util/process_memory.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/process_memory.h b/src/kudu/util/process_memory.h index 1545dc7..cba7046 100644 --- a/src/kudu/util/process_memory.h +++ b/src/kudu/util/process_memory.h @@ -44,6 +44,12 @@ int64_t CurrentConsumption(); // Return the configured hard limit for the process. int64_t HardLimit(); +// Return the configured soft limit for the process. +int64_t SoftLimit(); + +// Return the configured memory pressure threshold for the process. +int64_t MemoryPressureThreshold(); + #ifdef TCMALLOC_ENABLED // Get the current amount of allocated memory, according to tcmalloc. //
