IMPALA-5598: Fix excessive dumping in MemLimitExceeded ExecQueryFInstances RPC timeouts in stress tests were traced to excessive dumping in MemLimitExceeded() and LogUsage(). The QueryState::Init() hits the process memory limit, and this causes MemLimitExceeded to dump the process memory tracker. On the stress test, this involves dumping hundreds of queries and all the structures underneath. This is expensive and the contention between threads accessing these structures causes RPC timeouts.
This adds the ability to the limit the recursive depth when dumping memory trackers. It also modifies the dumping in MemLimitExceeded() to have the following semantics: 1. The query memory tracker is always logged in full if it exists. 2. The process memory tracker is logged if the query memory tracker doesn't exist or if the process memory limit is being hit. The process memory tracker is limited to dumping only two levels of children. Other uses of LogUsage() are not limited (e.g. /memz and the query memory page on the web UI). A stress test run with the process memory tracker limited to two levels showed no RPC timeouts. Change-Id: Ib151c3894d4c43b508779e6809d77e2f8af89cf2 Reviewed-on: http://gerrit.cloudera.org:8080/7597 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bc93b169 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bc93b169 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bc93b169 Branch: refs/heads/master Commit: bc93b1690d15d97a70cf5f6886b4969578a44240 Parents: b829570 Author: Joe McDonnell <[email protected]> Authored: Thu Aug 3 16:30:49 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Aug 16 02:04:30 2017 +0000 ---------------------------------------------------------------------- be/src/exec/exec-node.cc | 5 +- be/src/exec/hash-table-test.cc | 2 +- .../bufferpool/reservation-tracker-test.cc | 2 +- be/src/runtime/initial-reservations.cc | 3 +- be/src/runtime/mem-tracker.cc | 50 +++++++++++++------- be/src/runtime/mem-tracker.h | 23 +++++++-- be/src/runtime/runtime-state.cc | 2 +- be/src/service/impala-http-handler.cc | 2 +- be/src/util/default-path-handlers.cc | 3 +- 9 files changed, 61 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/exec/exec-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc index 56a1f10..c3a5f80 100644 --- a/be/src/exec/exec-node.cc +++ b/be/src/exec/exec-node.cc @@ -215,9 +215,10 @@ void ExecNode::Close(RuntimeState* state) { if (mem_tracker_ != nullptr) { if (mem_tracker()->consumption() != 0) { LOG(WARNING) << "Query " << state->query_id() << " may have leaked memory." << endl - << state->instance_mem_tracker()->LogUsage(); + << state->instance_mem_tracker()->LogUsage(MemTracker::UNLIMITED_DEPTH); DCHECK_EQ(mem_tracker()->consumption(), 0) - << "Leaked memory." << endl << state->instance_mem_tracker()->LogUsage(); + << "Leaked memory." << endl + << state->instance_mem_tracker()->LogUsage(MemTracker::UNLIMITED_DEPTH); } mem_tracker_->Close(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/exec/hash-table-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table-test.cc b/be/src/exec/hash-table-test.cc index aad9134..816e6cf 100644 --- a/be/src/exec/hash-table-test.cc +++ b/be/src/exec/hash-table-test.cc @@ -476,7 +476,7 @@ class HashTableTest : public testing::Test { bool success; EXPECT_OK(hash_table->CheckAndResize(num_to_add, ht_ctx.get(), &success)); EXPECT_TRUE(success) << " failed to resize: " << num_to_add << "\n" - << tracker_.LogUsage() << "\n" + << tracker_.LogUsage(MemTracker::UNLIMITED_DEPTH) << "\n" << clients_.back()->DebugString(); for (int j = 0; j < num_to_add; ++build_row_val, ++j) { TupleRow* row = CreateTupleRow(build_row_val); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/runtime/bufferpool/reservation-tracker-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/bufferpool/reservation-tracker-test.cc b/be/src/runtime/bufferpool/reservation-tracker-test.cc index a31d7a9..5d22eb9 100644 --- a/be/src/runtime/bufferpool/reservation-tracker-test.cc +++ b/be/src/runtime/bufferpool/reservation-tracker-test.cc @@ -330,7 +330,7 @@ TEST_F(ReservationTrackerTest, MemTrackerIntegrationMultiLevel) { ASSERT_EQ(amount, mem_trackers[ancestor]->consumption()); } - LOG(INFO) << "\n" << mem_trackers[0]->LogUsage(); + LOG(INFO) << "\n" << mem_trackers[0]->LogUsage(MemTracker::UNLIMITED_DEPTH); reservations[level].Close(); } else { ASSERT_FALSE(increased); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/runtime/initial-reservations.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/initial-reservations.cc b/be/src/runtime/initial-reservations.cc index 88bc920..39810ae 100644 --- a/be/src/runtime/initial-reservations.cc +++ b/be/src/runtime/initial-reservations.cc @@ -54,7 +54,8 @@ Status InitialReservations::Init( return Status(TErrorCode::MINIMUM_RESERVATION_UNAVAILABLE, PrettyPrinter::Print(query_min_reservation, TUnit::BYTES), FLAGS_hostname, FLAGS_be_port, PrintId(query_id), - ExecEnv::GetInstance()->process_mem_tracker()->LogUsage()); + ExecEnv::GetInstance()->process_mem_tracker()->LogUsage( + MemTracker::PROCESS_MEMTRACKER_LIMITED_DEPTH)); } VLOG_QUERY << "Successfully claimed initial reservations (" << PrettyPrinter::Print(query_min_reservation, TUnit::BYTES) << ") for" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/runtime/mem-tracker.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/mem-tracker.cc b/be/src/runtime/mem-tracker.cc index 525d069..445f499 100644 --- a/be/src/runtime/mem-tracker.cc +++ b/be/src/runtime/mem-tracker.cc @@ -115,7 +115,7 @@ void MemTracker::Close() { if (consumption_metric_ == nullptr) { DCHECK_EQ(consumption_->current_value(), 0) << label_ << "\n" << GetStackTrace() << "\n" - << LogUsage(""); + << LogUsage(UNLIMITED_DEPTH); } closed_ = true; } @@ -135,7 +135,7 @@ void MemTracker::EnableReservationReporting(const ReservationTrackerCounters& co int64_t MemTracker::GetPoolMemReserved() { // Pool trackers should have a pool_name_ and no limit. DCHECK(!pool_name_.empty()); - DCHECK_EQ(limit_, -1) << LogUsage(""); + DCHECK_EQ(limit_, -1) << LogUsage(UNLIMITED_DEPTH); int64_t mem_reserved = 0L; lock_guard<SpinLock> l(child_trackers_lock_); @@ -145,7 +145,7 @@ int64_t MemTracker::GetPoolMemReserved() { // Make sure we don't overflow if the query limits are set to ridiculous values. mem_reserved += std::min(child_limit, MemInfo::physical_mem()); } else { - DCHECK_EQ(child_limit, -1) << child->LogUsage(""); + DCHECK_EQ(child_limit, -1) << child->LogUsage(UNLIMITED_DEPTH); mem_reserved += child->consumption(); } } @@ -239,7 +239,8 @@ void MemTracker::RegisterMetrics(MetricGroup* metrics, const string& prefix) { // TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB // Total=6.04 MB Peak=6.45 MB // -string MemTracker::LogUsage(const string& prefix, int64_t* logged_consumption) { +string MemTracker::LogUsage(int max_recursive_depth, const string& prefix, + int64_t* logged_consumption) { // Make sure the consumption is up to date. if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric(); int64_t curr_consumption = consumption(); @@ -273,12 +274,17 @@ string MemTracker::LogUsage(const string& prefix, int64_t* logged_consumption) { ss << " Peak=" << PrettyPrinter::Print(peak_consumption, TUnit::BYTES); } + // This call does not need the children, so return early. + if (max_recursive_depth == 0) return ss.str(); + + // Recurse and get information about the children string new_prefix = Substitute(" $0", prefix); int64_t child_consumption; string child_trackers_usage; { lock_guard<SpinLock> l(child_trackers_lock_); - child_trackers_usage = LogUsage(new_prefix, child_trackers_, &child_consumption); + child_trackers_usage = LogUsage(max_recursive_depth - 1, new_prefix, + child_trackers_, &child_consumption); } if (!child_trackers_usage.empty()) ss << "\n" << child_trackers_usage; @@ -296,13 +302,14 @@ string MemTracker::LogUsage(const string& prefix, int64_t* logged_consumption) { return ss.str(); } -string MemTracker::LogUsage(const string& prefix, const list<MemTracker*>& trackers, - int64_t* logged_consumption) { +string MemTracker::LogUsage(int max_recursive_depth, const string& prefix, + const list<MemTracker*>& trackers, int64_t* logged_consumption) { *logged_consumption = 0; vector<string> usage_strings; for (MemTracker* tracker : trackers) { int64_t tracker_consumption; - string usage_string = tracker->LogUsage(prefix, &tracker_consumption); + string usage_string = tracker->LogUsage(max_recursive_depth, prefix, + &tracker_consumption); if (!usage_string.empty()) usage_strings.push_back(usage_string); *logged_consumption += tracker_consumption; } @@ -328,18 +335,25 @@ Status MemTracker::MemLimitExceeded(RuntimeState* state, const std::string& deta ss << "Memory left in process limit: " << PrettyPrinter::Print(process_capacity, TUnit::BYTES) << endl; - // Choose which tracker to log the usage of. Default to the process tracker so we can - // get the full view of memory consumption. - MemTracker* tracker_to_log = process_tracker; - if (state != nullptr && state->query_mem_tracker()->has_limit()) { + // Always log the query tracker (if available). + if (state != nullptr) { MemTracker* query_tracker = state->query_mem_tracker(); - const int64_t query_capacity = query_tracker->limit() - query_tracker->consumption(); - ss << "Memory left in query limit: " - << PrettyPrinter::Print(query_capacity, TUnit::BYTES) << endl; - // Log the query tracker only if the query limit was closer to being exceeded. - if (query_capacity < process_capacity) tracker_to_log = query_tracker; + if (query_tracker->has_limit()) { + const int64_t query_capacity = query_tracker->limit() - query_tracker->consumption(); + ss << "Memory left in query limit: " + << PrettyPrinter::Print(query_capacity, TUnit::BYTES) << endl; + } + ss << query_tracker->LogUsage(UNLIMITED_DEPTH); } - ss << tracker_to_log->LogUsage(); + + // Log the process level if the process tracker is close to the limit or + // if the query tracker was not available. + if (process_capacity < failed_allocation_size || state == nullptr) { + // IMPALA-5598: For performance reasons, limit the levels of recursion when + // dumping the process tracker to only two layers. + ss << process_tracker->LogUsage(PROCESS_MEMTRACKER_LIMITED_DEPTH); + } + Status status = Status::MemLimitExceeded(ss.str()); if (state != nullptr) state->LogError(status.msg()); return status; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/runtime/mem-tracker.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/mem-tracker.h b/be/src/runtime/mem-tracker.h index 8d3d645..0d2f4d7 100644 --- a/be/src/runtime/mem-tracker.h +++ b/be/src/runtime/mem-tracker.h @@ -229,7 +229,7 @@ class MemTracker { /// consistent.) if (tracker->consumption_metric_ == NULL) { DCHECK_GE(tracker->consumption_->current_value(), 0) - << std::endl << tracker->LogUsage(); + << std::endl << tracker->LogUsage(UNLIMITED_DEPTH); } } } @@ -324,12 +324,24 @@ class MemTracker { /// "<prefix>.<metric name>". void RegisterMetrics(MetricGroup* metrics, const std::string& prefix); - /// Logs the usage of this tracker and all of its children (recursively). + /// Logs the usage of this tracker and optionally its children (recursively). /// If 'logged_consumption' is non-NULL, sets the consumption value logged. + /// 'max_recursive_depth' specifies the maximum number of levels of children + /// to include in the dump. If it is zero, then no children are dumped. + /// Limiting the recursive depth reduces the cost of dumping, particularly + /// for the process MemTracker. /// TODO: once all memory is accounted in ReservationTracker hierarchy, move /// reporting there. - std::string LogUsage( + std::string LogUsage(int max_recursive_depth, const std::string& prefix = "", int64_t* logged_consumption = nullptr); + /// Dumping the process MemTracker is expensive. Limiting the recursive depth + /// to two levels limits the level of detail to a one-line summary for each query + /// MemTracker, avoiding all MemTrackers below that level. This provides a summary + /// of process usage with substantially lower cost than the full dump. + static const int PROCESS_MEMTRACKER_LIMITED_DEPTH = 2; + /// Unlimited dumping is useful for query memtrackers or error conditions that + /// are not performance sensitive + static const int UNLIMITED_DEPTH = INT_MAX; /// Log the memory usage when memory limit is exceeded and return a status object with /// details of the allocation which caused the limit to be exceeded. @@ -358,8 +370,9 @@ class MemTracker { void AddChildTracker(MemTracker* tracker); /// Log consumption of all the trackers provided. Returns the sum of consumption in - /// 'logged_consumption'. - static std::string LogUsage(const std::string& prefix, + /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels + /// of children to include in the dump. If it is zero, then no children are dumped. + static std::string LogUsage(int max_recursive_depth, const std::string& prefix, const std::list<MemTracker*>& trackers, int64_t* logged_consumption); /// Lock to protect GcMemory(). This prevents many GCs from occurring at once. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/runtime/runtime-state.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc index b10a9e5..fd1c061 100644 --- a/be/src/runtime/runtime-state.cc +++ b/be/src/runtime/runtime-state.cc @@ -243,7 +243,7 @@ void RuntimeState::ReleaseResources() { // No more memory should be tracked for this instance at this point. if (instance_mem_tracker_->consumption() != 0) { LOG(WARNING) << "Query " << query_id() << " may have leaked memory." << endl - << instance_mem_tracker_->LogUsage(); + << instance_mem_tracker_->LogUsage(MemTracker::UNLIMITED_DEPTH); } instance_mem_tracker_->Close(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/service/impala-http-handler.cc ---------------------------------------------------------------------- diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc index e6d5323..5a79d9d 100644 --- a/be/src/service/impala-http-handler.cc +++ b/be/src/service/impala-http-handler.cc @@ -261,7 +261,7 @@ void ImpalaHttpHandler::QueryMemoryHandler(const Webserver::ArgumentMap& args, string mem_usage_text; // Only in-flight queries have a MemTracker to get usage from. if (qs.get() != nullptr) { - mem_usage_text = qs->query_mem_tracker()->LogUsage(); + mem_usage_text = qs->query_mem_tracker()->LogUsage(MemTracker::UNLIMITED_DEPTH); } else { mem_usage_text = "The query is finished, current memory consumption is not available."; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bc93b169/be/src/util/default-path-handlers.cc ---------------------------------------------------------------------- diff --git a/be/src/util/default-path-handlers.cc b/be/src/util/default-path-handlers.cc index 17d2c94..9334316 100644 --- a/be/src/util/default-path-handlers.cc +++ b/be/src/util/default-path-handlers.cc @@ -147,7 +147,8 @@ void MemUsageHandler(MemTracker* mem_tracker, MetricGroup* metric_group, document->AddMember("overview", overview, document->GetAllocator()); // Dump all mem trackers. - Value detailed(mem_tracker->LogUsage().c_str(), document->GetAllocator()); + Value detailed(mem_tracker->LogUsage(MemTracker::UNLIMITED_DEPTH).c_str(), + document->GetAllocator()); document->AddMember("detailed", detailed, document->GetAllocator()); Value systeminfo(MemInfo::DebugString().c_str(), document->GetAllocator());
