IMPALA-5661: buffer pool limit Adds the --buffer_pool_limit flag to control the buffer pool size. It can be specified as either an absolute memory value or a percentage of the process memory limit
Testing: Started up a cluster with --buffer_pool_limit=10%, confirmed via /metrics page that the buffer pool limit was reduced to ~800MB on my system. Change-Id: Ia64e21e0d5a7cf35a9064f365c6c86db13fbd73d Reviewed-on: http://gerrit.cloudera.org:8080/7462 Reviewed-by: Matthew Jacobs <m...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f14e68c7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f14e68c7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f14e68c7 Branch: refs/heads/master Commit: f14e68c7255927a595b5fc017cc86960f59bc8df Parents: 68df21b Author: Tim Armstrong <tarmstr...@cloudera.com> Authored: Tue Jul 18 16:57:30 2017 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Tue Aug 8 10:45:33 2017 +0000 ---------------------------------------------------------------------- be/src/common/constant-strings.h | 31 ++++++++++++++++++++++++++ be/src/common/global-flags.cc | 22 ++++++++++++++---- be/src/runtime/exec-env.cc | 19 ++++++++++------ be/src/scheduling/request-pool-service.cc | 13 ++++++----- 4 files changed, 68 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/common/constant-strings.h ---------------------------------------------------------------------- diff --git a/be/src/common/constant-strings.h b/be/src/common/constant-strings.h new file mode 100644 index 0000000..66f0b0b --- /dev/null +++ b/be/src/common/constant-strings.h @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file includes constant strings that are used in multiple places in the codebase. + +#ifndef IMPALA_COMMON_CONSTANT_STRINGS_H_ +#define IMPALA_COMMON_CONSTANT_STRINGS_H_ + +// Template for a description of the ways to specify bytes. strings::Substitute() must +// used to fill in the blanks. +#define MEM_UNITS_HELP_MSG "Specified as number of bytes ('<int>[bB]?'), " \ + "megabytes ('<float>[mM]'), " \ + "gigabytes ('<float>[gG]'), " \ + "or percentage of $0 ('<int>%'). " \ + "Defaults to bytes if no unit is given." + +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/common/global-flags.cc ---------------------------------------------------------------------- diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc index 0a6ca28..98417f2 100644 --- a/be/src/common/global-flags.cc +++ b/be/src/common/global-flags.cc @@ -21,7 +21,13 @@ // a main()), or flags that are referenced from multiple places and having them here // calms the linker errors that would otherwise ensue. +#include <string> + +#include "common/constant-strings.h" #include "common/logging.h" +#include "gutil/strings/substitute.h" + +#include "common/names.h" // This will be defaulted to the host name returned by the OS. // This name is used in the principal generated for Kerberos authorization. @@ -41,10 +47,18 @@ DEFINE_string(krb5_conf, "", "Absolute path to Kerberos krb5.conf if in a non-st "location. Does not normally need to be set."); DEFINE_string(krb5_debug_file, "", "Turn on Kerberos debugging and output to this file"); -DEFINE_string(mem_limit, "80%", "Process memory limit specified as number of bytes " - "('<int>[bB]?'), megabytes ('<float>[mM]'), gigabytes ('<float>[gG]'), " - "or percentage of the physical memory ('<int>%'). " - "Defaults to bytes if no unit is given"); +static const string mem_limit_help_msg = "Limit on process memory consumption, " + "excluding the JVM's memory consumption. " + + Substitute(MEM_UNITS_HELP_MSG, "the physical memory"); +DEFINE_string(mem_limit, "80%", mem_limit_help_msg.c_str()); + +static const string buffer_pool_limit_help_msg = "(Advanced) Limit on buffer pool size. " + + Substitute(MEM_UNITS_HELP_MSG, "the process memory limit") + " " + "The default value and behaviour of this flag may change between releases."; +DEFINE_string(buffer_pool_limit, "80%", buffer_pool_limit_help_msg.c_str()); + +DEFINE_int64(min_buffer_size, 64 * 1024, + "(Advanced) The minimum buffer size to use in the buffer pool"); DEFINE_bool(enable_process_lifetime_heap_profiling, false, "(Advanced) Enables heap " "profiling for the lifetime of the process. Profile output will be stored in the " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/runtime/exec-env.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/exec-env.cc b/be/src/runtime/exec-env.cc index f2ee6f0..8239a68 100644 --- a/be/src/runtime/exec-env.cc +++ b/be/src/runtime/exec-env.cc @@ -75,14 +75,14 @@ DEFINE_int32(state_store_subscriber_port, 23000, DEFINE_int32(num_hdfs_worker_threads, 16, "(Advanced) The number of threads in the global HDFS operation pool"); DEFINE_bool(disable_admission_control, false, "Disables admission control."); -DEFINE_int64(min_buffer_size, 64 * 1024, - "(Advanced) The minimum buffer size to use in the buffer pool"); DECLARE_int32(state_store_port); DECLARE_int32(num_threads_per_core); DECLARE_int32(num_cores); DECLARE_int32(be_port); DECLARE_string(mem_limit); +DECLARE_string(buffer_pool_limit); +DECLARE_int64(min_buffer_size); DECLARE_bool(is_coordinator); DECLARE_int32(webserver_port); @@ -241,9 +241,14 @@ Status ExecEnv::StartServices() { return Status(Substitute( "--min_buffer_size must be a power-of-two: $0", FLAGS_min_buffer_size)); } - int64_t buffer_pool_capacity = BitUtil::RoundDown( - no_process_mem_limit ? system_mem : bytes_limit * 4 / 5, FLAGS_min_buffer_size); - InitBufferPool(FLAGS_min_buffer_size, buffer_pool_capacity); + int64_t buffer_pool_limit = ParseUtil::ParseMemSpec(FLAGS_buffer_pool_limit, + &is_percent, no_process_mem_limit ? system_mem : bytes_limit); + if (buffer_pool_limit <= 0) { + return Status(Substitute("Invalid --buffer_pool_limit value, must be a percentage or " + "positive bytes value: $0", FLAGS_buffer_pool_limit)); + } + buffer_pool_limit = BitUtil::RoundDown(buffer_pool_limit, FLAGS_min_buffer_size); + InitBufferPool(FLAGS_min_buffer_size, buffer_pool_limit); metrics_->Init(enable_webserver_ ? webserver_.get() : nullptr); impalad_client_cache_->InitMetrics(metrics_.get(), "impala-server.backends"); @@ -282,8 +287,8 @@ Status ExecEnv::StartServices() { } LOG(INFO) << "Using global memory limit: " << PrettyPrinter::Print(bytes_limit, TUnit::BYTES); - LOG(INFO) << "Buffer pool capacity: " - << PrettyPrinter::Print(buffer_pool_capacity, TUnit::BYTES); + LOG(INFO) << "Buffer pool limit: " + << PrettyPrinter::Print(buffer_pool_limit, TUnit::BYTES); RETURN_IF_ERROR(disk_io_mgr_->Init(mem_tracker_.get())); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/scheduling/request-pool-service.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/request-pool-service.cc b/be/src/scheduling/request-pool-service.cc index b674bf0..bad5ac1 100644 --- a/be/src/scheduling/request-pool-service.cc +++ b/be/src/scheduling/request-pool-service.cc @@ -23,6 +23,7 @@ #include <string> #include <gutil/strings/substitute.h> +#include "common/constant-strings.h" #include "common/logging.h" #include "rpc/jni-thrift-util.h" #include "service/query-options.h" @@ -56,12 +57,12 @@ DEFINE_int64(default_pool_max_requests, -1, "Maximum number of concurrent outsta "requests allowed to run before queueing incoming requests. A negative value " "indicates no limit. 0 indicates no requests will be admitted. Ignored if " "fair_scheduler_config_path and llama_site_path are set."); -DEFINE_string(default_pool_mem_limit, "", "Maximum amount of memory that all " - "outstanding requests in this pool may use before new requests to this pool" - " are queued. Specified as a number of bytes ('<int>[bB]?'), megabytes " - "('<float>[mM]'), gigabytes ('<float>[gG]'), or percentage of the physical memory " - "('<int>%'). 0 or not setting indicates no limit. Defaults to bytes if no unit is " - "given. Ignored if fair_scheduler_config_path and llama_site_path are set."); + +static const string default_pool_mem_limit_help_msg = "Maximum amount of memory that all " + "outstanding requests in this pool may use before new requests to this pool " + "are queued. " + Substitute(MEM_UNITS_HELP_MSG, "the physical memory") + " " + "Ignored if fair_scheduler_config_path and llama_site_path are set."; +DEFINE_string(default_pool_mem_limit, "", default_pool_mem_limit_help_msg.c_str()); DEFINE_int64(default_pool_max_queued, 200, "Maximum number of requests allowed to be " "queued before rejecting requests. A negative value or 0 indicates requests " "will always be rejected once the maximum number of concurrent requests are "