This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e1db9450ca2 [fix](memory) Allows to enable memory tracker accuracy
detection via Config (#40714)
e1db9450ca2 is described below
commit e1db9450ca2cd42f2a3bf7379de0f156f1ee411e
Author: Xinyi Zou <[email protected]>
AuthorDate: Fri Sep 13 12:11:04 2024 +0800
[fix](memory) Allows to enable memory tracker accuracy detection via Config
(#40714)
add `crash_in_memory_tracker_inaccurate`, If memory tracker value is
inaccurate, BE will crash. usually used in test environments, default
value is false.
---
be/src/common/config.cpp | 3 +++
be/src/common/config.h | 4 ++++
be/src/runtime/memory/mem_tracker_limiter.cpp | 23 +++++++++++-----------
be/src/runtime/memory/mem_tracker_limiter.h | 7 ++-----
.../pipeline/cloud_p0/conf/be_custom.conf | 3 ++-
.../pipeline/cloud_p1/conf/be_custom.conf | 3 ++-
regression-test/pipeline/external/conf/be.conf | 3 ++-
regression-test/pipeline/p0/conf/be.conf | 3 ++-
regression-test/pipeline/p1/conf/be.conf | 1 +
9 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 06144dd3142..68e630fe830 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -150,6 +150,9 @@ DEFINE_mInt64(stacktrace_in_alloc_large_memory_bytes,
"2147483648");
DEFINE_mInt64(crash_in_alloc_large_memory_bytes, "-1");
+// If memory tracker value is inaccurate, BE will crash. usually used in test
environments, default value is false.
+DEFINE_mBool(crash_in_memory_tracker_inaccurate, "false");
+
// default is true. if any memory tracking in Orphan mem tracker will report
error.
// !! not modify the default value of this conf!! otherwise memory errors
cannot be detected in time.
// allocator free memory not need to check, because when the thread memory
tracker label is Orphan,
diff --git a/be/src/common/config.h b/be/src/common/config.h
index cc26f52abba..f242a7ef512 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -194,11 +194,15 @@ DECLARE_mBool(enable_stacktrace);
// if alloc failed using Doris Allocator, will print stacktrace in error log.
// if is -1, disable print stacktrace when alloc large memory.
DECLARE_mInt64(stacktrace_in_alloc_large_memory_bytes);
+
// when alloc memory larger than crash_in_alloc_large_memory_bytes will crash,
default -1 means disabled.
// if you need a core dump to analyze large memory allocation,
// modify this parameter to crash when large memory allocation occur will help
DECLARE_mInt64(crash_in_alloc_large_memory_bytes);
+// If memory tracker value is inaccurate, BE will crash. usually used in test
environments, default value is false.
+DECLARE_mBool(crash_in_memory_tracker_inaccurate);
+
// default is true. if any memory tracking in Orphan mem tracker will report
error.
// !! not modify the default value of this conf!! otherwise memory errors
cannot be detected in time.
// allocator free memory not need to check, because when the thread memory
tracker label is Orphan,
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 85cceb365c8..a1eb2ed67d3 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -108,6 +108,12 @@ std::shared_ptr<MemTrackerLimiter>
MemTrackerLimiter::create_shared(MemTrackerLi
return tracker;
}
+bool MemTrackerLimiter::open_memory_tracker_inaccurate_detect() {
+ return doris::config::crash_in_memory_tracker_inaccurate &&
+ (_type == Type::COMPACTION || _type == Type::SCHEMA_CHANGE || _type
== Type::QUERY ||
+ (_type == Type::LOAD && !is_group_commit_load));
+}
+
MemTrackerLimiter::~MemTrackerLimiter() {
consume(_untracked_mem);
static std::string mem_tracker_inaccurate_msg =
@@ -127,35 +133,29 @@ MemTrackerLimiter::~MemTrackerLimiter() {
"4. If you need to "
"transfer memory tracking value between two trackers, can use
transfer_to.";
if (_consumption->current_value() != 0) {
-// TODO, expect mem tracker equal to 0 at the load/compaction/etc. task end.
-#ifndef NDEBUG
- if (_type == Type::COMPACTION || _type == Type::SCHEMA_CHANGE || _type
== Type::QUERY ||
- (_type == Type::LOAD && !is_group_commit_load)) {
+ if (open_memory_tracker_inaccurate_detect()) {
std::string err_msg =
fmt::format("mem tracker label: {}, consumption: {}, peak
consumption: {}, {}.",
label(), _consumption->current_value(),
_consumption->peak_value(),
mem_tracker_inaccurate_msg);
LOG(FATAL) << err_msg << print_address_sanitizers();
}
-#endif
if (ExecEnv::tracking_memory()) {
ExecEnv::GetInstance()->orphan_mem_tracker()->consume(_consumption->current_value());
}
_consumption->set(0);
-#ifndef NDEBUG
- } else if (!_address_sanitizers.empty() && !is_group_commit_load) {
+ } else if (doris::config::crash_in_memory_tracker_inaccurate &&
!_address_sanitizers.empty() &&
+ !is_group_commit_load) {
LOG(FATAL) << "[Address Sanitizer] consumption is 0, but address
sanitizers not empty. "
<< ", mem tracker label: " << _label
<< ", peak consumption: " << _consumption->peak_value()
<< print_address_sanitizers();
-#endif
}
memory_memtrackerlimiter_cnt << -1;
}
-#ifndef NDEBUG
void MemTrackerLimiter::add_address_sanitizers(void* buf, size_t size) {
- if (_type == Type::QUERY || (_type == Type::LOAD &&
!is_group_commit_load)) {
+ if (open_memory_tracker_inaccurate_detect()) {
std::lock_guard<std::mutex> l(_address_sanitizers_mtx);
auto it = _address_sanitizers.find(buf);
if (it != _address_sanitizers.end()) {
@@ -177,7 +177,7 @@ void MemTrackerLimiter::add_address_sanitizers(void* buf,
size_t size) {
}
void MemTrackerLimiter::remove_address_sanitizers(void* buf, size_t size) {
- if (_type == Type::QUERY || (_type == Type::LOAD &&
!is_group_commit_load)) {
+ if (open_memory_tracker_inaccurate_detect()) {
std::lock_guard<std::mutex> l(_address_sanitizers_mtx);
auto it = _address_sanitizers.find(buf);
if (it != _address_sanitizers.end()) {
@@ -221,7 +221,6 @@ std::string MemTrackerLimiter::print_address_sanitizers() {
}
return detail;
}
-#endif
MemTracker::Snapshot MemTrackerLimiter::make_snapshot() const {
Snapshot snapshot;
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h
b/be/src/runtime/memory/mem_tracker_limiter.h
index 344f3dc92b6..c8a8c845793 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -205,12 +205,9 @@ public:
// Log the memory usage when memory limit is exceeded.
std::string tracker_limit_exceeded_str();
-#ifndef NDEBUG
void add_address_sanitizers(void* buf, size_t size);
void remove_address_sanitizers(void* buf, size_t size);
- std::string print_address_sanitizers();
bool is_group_commit_load {false};
-#endif
std::string debug_string() override {
std::stringstream msg;
@@ -253,16 +250,16 @@ private:
bool _enable_print_log_usage = false;
static std::atomic<bool> _enable_print_log_process_usage;
-#ifndef NDEBUG
struct AddressSanitizer {
size_t size;
std::string stack_trace;
};
+ std::string print_address_sanitizers();
+ bool open_memory_tracker_inaccurate_detect();
std::mutex _address_sanitizers_mtx;
std::unordered_map<void*, AddressSanitizer> _address_sanitizers;
std::vector<std::string> _error_address_sanitizers;
-#endif
};
inline int64_t MemTrackerLimiter::add_untracked_mem(int64_t bytes) {
diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
index c8f41b100a7..377a02536c6 100644
--- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
@@ -35,4 +35,5 @@ stream_load_record_batch_size = 500
webserver_num_workers = 128
enable_new_tablet_do_compaction = true
arrow_flight_sql_port = 8181
-pipeline_task_leakage_detect_period_sec=1
\ No newline at end of file
+pipeline_task_leakage_detect_period_sec=1
+crash_in_memory_tracker_inaccurate = true
diff --git a/regression-test/pipeline/cloud_p1/conf/be_custom.conf
b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
index b0649875178..0dc78140ed9 100644
--- a/regression-test/pipeline/cloud_p1/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
@@ -30,4 +30,5 @@ file_cache_path =
[{"path":"/data/doris_cloud/file_cache","total_size":104857600
tmp_file_dirs =
[{"path":"/data/doris_cloud/tmp","max_cache_bytes":104857600,"max_upload_bytes":104857600}]
save_load_error_log_to_s3 = true
arrow_flight_sql_port = 8181
-pipeline_task_leakage_detect_period_sec=1
\ No newline at end of file
+pipeline_task_leakage_detect_period_sec=1
+crash_in_memory_tracker_inaccurate = true
diff --git a/regression-test/pipeline/external/conf/be.conf
b/regression-test/pipeline/external/conf/be.conf
index a7edbd7b55a..19ebc9ee812 100644
--- a/regression-test/pipeline/external/conf/be.conf
+++ b/regression-test/pipeline/external/conf/be.conf
@@ -66,4 +66,5 @@ enable_jvm_monitor = true
KRB5_CONFIG=/keytabs/krb5.conf
kerberos_krb5_conf_path=/keytabs/krb5.conf
-pipeline_task_leakage_detect_period_sec=1
\ No newline at end of file
+pipeline_task_leakage_detect_period_sec=1
+crash_in_memory_tracker_inaccurate = true
diff --git a/regression-test/pipeline/p0/conf/be.conf
b/regression-test/pipeline/p0/conf/be.conf
index e4745ccb5a3..c5c8104ecf1 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -69,4 +69,5 @@ enable_jvm_monitor = true
enable_be_proc_monitor = true
be_proc_monitor_interval_ms = 30000
webserver_num_workers = 128
-pipeline_task_leakage_detect_period_sec=1
\ No newline at end of file
+pipeline_task_leakage_detect_period_sec=1
+crash_in_memory_tracker_inaccurate = true
diff --git a/regression-test/pipeline/p1/conf/be.conf
b/regression-test/pipeline/p1/conf/be.conf
index 1c0fd53d495..01510e6422b 100644
--- a/regression-test/pipeline/p1/conf/be.conf
+++ b/regression-test/pipeline/p1/conf/be.conf
@@ -62,3 +62,4 @@ enable_missing_rows_correctness_check=true
enable_jvm_monitor = true
pipeline_task_leakage_detect_period_sec=1
+crash_in_memory_tracker_inaccurate = true
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]