This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 01d012bab7 [fix](memory) Remove page cache regular clear, disabled
jemalloc prof by default (#18218)
01d012bab7 is described below
commit 01d012bab7ab0c698257991953e2ff5f6ecce775
Author: Xinyi Zou <[email protected]>
AuthorDate: Thu Mar 30 09:39:37 2023 +0800
[fix](memory) Remove page cache regular clear, disabled jemalloc prof by
default (#18218)
Remove page cache regular clear
Now the page cache is turned off by default. If the user manually opens the
page cache, it can be considered that the user can accept the memory usage of
the page cache, and then can consider adding a manual clear command to the
cache.
fix memory gc cancel top memory query
jemalloc prof is not enabled by default
---
be/src/common/config.h | 4 ---
be/src/common/daemon.cpp | 12 ---------
be/src/runtime/memory/mem_tracker_limiter.cpp | 5 +---
bin/start_be.sh | 2 +-
docs/en/community/developer-guide/be-vscode-dev.md | 2 ++
docs/en/community/developer-guide/debug-tool.md | 31 ++++++++++++----------
.../community/developer-guide/be-vscode-dev.md | 2 ++
docs/zh-CN/community/developer-guide/debug-tool.md | 11 ++++----
8 files changed, 29 insertions(+), 40 deletions(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index aeb565ee79..9ed5f36482 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -77,10 +77,6 @@ CONF_Int64(max_sys_mem_available_low_water_mark_bytes,
"1717986918");
// The size of the memory that gc wants to release each time, as a percentage
of the mem limit.
CONF_mString(process_minor_gc_size, "10%");
CONF_mString(process_full_gc_size, "20%");
-// Some caches have their own gc threads, such as segment cache.
-// For caches that do not have a separate gc thread, perform regular gc in the
memory maintenance thread.
-// Currently only storage page cache, chunk allocator, more in the future.
-CONF_mInt32(cache_gc_interval_s, "60");
// If true, when the process does not exceed the soft mem limit, the query
memory will not be limited;
// when the process memory exceeds the soft mem limit, the query with the
largest ratio between the currently
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index c388fac9aa..9b7ee5dddc 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -205,10 +205,8 @@ void Daemon::memory_maintenance_thread() {
void Daemon::memory_gc_thread() {
int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
- int32_t cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
int32_t memory_minor_gc_sleep_time_ms = 0;
int32_t memory_full_gc_sleep_time_ms = 0;
- int64_t cache_gc_freed_mem = 0;
while (!_stop_background_threads_latch.wait_for(
std::chrono::milliseconds(interval_milliseconds))) {
if (!MemInfo::initialized() || !ExecEnv::GetInstance()->initialized())
{
@@ -221,7 +219,6 @@ void Daemon::memory_gc_thread() {
// No longer full gc and minor gc during sleep.
memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_s *
1000;
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s *
1000;
- cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
doris::MemTrackerLimiter::print_log_process_usage("process full
gc", false);
if (doris::MemInfo::process_full_gc()) {
// If there is not enough memory to be gc, the process memory
usage will not be printed in the next continuous gc.
@@ -234,7 +231,6 @@ void Daemon::memory_gc_thread() {
doris::MemInfo::soft_mem_limit())) {
// No minor gc during sleep, but full gc is possible.
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s *
1000;
- cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
doris::MemTrackerLimiter::print_log_process_usage("process minor
gc", false);
if (doris::MemInfo::process_minor_gc()) {
doris::MemTrackerLimiter::enable_print_log_process_usage();
@@ -246,14 +242,6 @@ void Daemon::memory_gc_thread() {
if (memory_minor_gc_sleep_time_ms > 0) {
memory_minor_gc_sleep_time_ms -= interval_milliseconds;
}
- cache_gc_interval_ms -= interval_milliseconds;
- if (cache_gc_interval_ms < 0) {
- cache_gc_freed_mem = 0;
- doris::MemInfo::process_cache_gc(cache_gc_freed_mem);
- LOG(INFO) << fmt::format("Process regular GC Cache, Free
Memory {} Bytes",
- cache_gc_freed_mem);
- cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
- }
}
}
}
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 9015485b8e..f817da9d78 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -305,9 +305,6 @@ int64_t MemTrackerLimiter::free_top_memory_query(int64_t
min_free_mem,
std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
if (tracker->type() == type) {
- if (tracker->consumption() <= 104857600) { // 100M small query
does not cancel
- continue;
- }
if (ExecEnv::GetInstance()->fragment_mgr()->query_is_canceled(
label_to_queryid(tracker->label()))) {
continue;
@@ -351,7 +348,7 @@ int64_t
MemTrackerLimiter::free_top_overcommit_query(int64_t min_free_mem,
std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
if (tracker->type() == type) {
- if (tracker->consumption() <= 104857600) { // 100M small query
does not cancel
+ if (tracker->consumption() <= 33554432) { // 32M small query
does not cancel
continue;
}
if (ExecEnv::GetInstance()->fragment_mgr()->query_is_canceled(
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 199990a4b2..7204d65114 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -236,7 +236,7 @@ if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
fi
# see
https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
-export
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof:true,prof_prefix:jeprof.out"
+export
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
if [[ "${RUN_DAEMON}" -eq 1 ]]; then
nohup ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/doris_be" "$@"
>>"${LOG_DIR}/be.out" 2>&1 </dev/null &
diff --git a/docs/en/community/developer-guide/be-vscode-dev.md
b/docs/en/community/developer-guide/be-vscode-dev.md
index 6552d5c8d7..db02f6df3e 100644
--- a/docs/en/community/developer-guide/be-vscode-dev.md
+++ b/docs/en/community/developer-guide/be-vscode-dev.md
@@ -207,6 +207,8 @@ In the configuration **"request": "attach", "processId":
PID**, these two config
ps -ef | grep palo*
```
+Or write **"processId": "${command:pickProcess}"** to specify the pid when
starting attach.
+
As shown in the figure:

diff --git a/docs/en/community/developer-guide/debug-tool.md
b/docs/en/community/developer-guide/debug-tool.md
index 96c705b454..9d14782ab5 100644
--- a/docs/en/community/developer-guide/debug-tool.md
+++ b/docs/en/community/developer-guide/debug-tool.md
@@ -238,7 +238,7 @@ From the above output, we can see that 1024 bytes have been
leaked, and the stac
#### JEMALLOC HEAP PROFILE
##### 1. runtime heap dump by http
-No need to restart BE, use jemalloc heap dump http interface, jemalloc
generates heap dump file on the corresponding BE machine according to the
current memory usage.
+Add `,prof:true,lg_prof_sample:10` to `JEMALLOC_CONF` in `start_be.sh` and
restart BE, then use the jemalloc heap dump http interface to generate a heap
dump file on the corresponding BE machine.
The directory where the heap dump file is located can be configured through
the ``jeprofile_dir`` variable in ``be.conf``, and the default is
``${DORIS_HOME}/log``
@@ -246,21 +246,12 @@ The directory where the heap dump file is located can be
configured through the
curl http://be_host:be_webport/jeheap/dump
```
-##### 2. heap dump by JEMALLOC_CONF
-Perform heap dump by restarting BE after changing the `JEMALLOC_CONF` variable
in `start_be.sh`
-
-1. Dump every 1MB:
-
- Two new variable settings `prof:true,lg_prof_interval:20` have been added
to the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and
`lg_prof_interval:20` means that a dump is generated every 1MB (2^20)
-2. Dump each time a new high is reached:
-
- Added two variable settings `prof:true,prof_gdump:true` in the
`JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and
`prof_gdump:true` means to generate a dump when the memory usage reaches a new
high
-3. Memory leak dump when the program exits:
-
- Added three new variable settings `prof_leak: true, lg_prof_sample: 0,
prof_final: true` in the `JEMALLOC_CONF` variable
+`prof`: After opening, jemalloc will generate a heap dump file according to
the current memory usage. There is a small amount of performance loss in heap
profile sampling, which can be turned off during performance testing.
+`lg_prof_sample`: heap profile sampling interval, the default value is 19,
that is, the default sampling interval is 512K (2^19 B), which will result in
only 10% of the memory recorded by the heap profile, `lg_prof_sample:10` can
reduce the sampling interval to 1K (2^10 B), more frequent sampling will make
the heap profile close to real memory, but this will bring greater performance
loss.
+For detailed parameter description, refer to
https://linux.die.net/man/3/jemalloc.
-#### 3. jemalloc heap dump profiling
+#### 2. jemalloc heap dump profiling
3.1 Generating plain text analysis results
```shell
@@ -287,6 +278,18 @@ jeprof lib/doris_be --base=heap_dump_file_1
heap_dump_file_2
In the above jeprof related commands, remove the `--base` option to analyze
only a single heap dump file
+##### 3. heap dump by JEMALLOC_CONF
+Perform heap dump by restarting BE after changing the `JEMALLOC_CONF` variable
in `start_be.sh`
+
+1. Dump every 1MB:
+
+ Two new variable settings `prof:true,lg_prof_interval:20` have been added
to the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and
`lg_prof_interval:20` means that a dump is generated every 1MB (2^20)
+2. Dump each time a new high is reached:
+
+ Added two variable settings `prof:true,prof_gdump:true` in the
`JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and
`prof_gdump:true` means to generate a dump when the memory usage reaches a new
high
+3. Memory leak dump when the program exits:
+
+ Added three new variable settings `prof_leak: true, lg_prof_sample: 0,
prof_final: true` in the `JEMALLOC_CONF` variable
#### ASAN
diff --git a/docs/zh-CN/community/developer-guide/be-vscode-dev.md
b/docs/zh-CN/community/developer-guide/be-vscode-dev.md
index 4bcaf5d407..338413dc38 100644
--- a/docs/zh-CN/community/developer-guide/be-vscode-dev.md
+++ b/docs/zh-CN/community/developer-guide/be-vscode-dev.md
@@ -206,6 +206,8 @@ mkdir -p /soft/be/storage
ps -ef | grep palo*
```
+或者写作 **"processId": "${command:pickProcess}"**,可在启动attach时指定pid.
+
如图:

diff --git a/docs/zh-CN/community/developer-guide/debug-tool.md
b/docs/zh-CN/community/developer-guide/debug-tool.md
index a1b4e24947..29278d1757 100644
--- a/docs/zh-CN/community/developer-guide/debug-tool.md
+++ b/docs/zh-CN/community/developer-guide/debug-tool.md
@@ -202,7 +202,7 @@ Total: 1296.4 MB
#### JEMALLOC HEAP PROFILE
##### 1. runtime heap dump by http
-无需重启BE, 使用jemalloc heap dump http接口,jemalloc根据当前内存使用情况,在对应的BE机器上生成heap dump文件。
+在`start_be.sh` 中`JEMALLOC_CONF` 增加 `,prof:true,lg_prof_sample:10`
并重启BE,然后使用jemalloc heap dump http接口,在对应的BE机器上生成heap dump文件。
heap dump文件所在目录可以在 ``be.conf``
中通过``jeprofile_dir``变量进行配置,默认为``${DORIS_HOME}/log``
@@ -210,6 +210,11 @@ heap dump文件所在目录可以在 ``be.conf`` 中通过``jeprofile_dir``变
curl http://be_host:be_webport/jeheap/dump
```
+`prof`: 打开后jemalloc将根据当前内存使用情况生成heap dump文件,heap profile采样存在少量性能损耗,性能测试时可关闭。
+`lg_prof_sample`: heap profile采样间隔,默认值19,即默认采样间隔为512K(2^19 B),这会导致heap
profile记录的内存通常只有10%,`lg_prof_sample:10`可以减少采样间隔到1K (2^10 B), 更频繁的采样会使heap
profile接近真实内存,但这会带来更大的性能损耗。
+
+详细参数说明参考 https://linux.die.net/man/3/jemalloc。
+
#### 2. jemalloc heap dump profiling
1. 单个heap dump文件生成纯文本分析结果
@@ -240,10 +245,6 @@ curl http://be_host:be_webport/jeheap/dump
jeprof --pdf lib/doris_be --base=heap_dump_file_1 heap_dump_file_2 >
result.pdf
```
-默认heap profile采样间隔512K(2^19 B),这会导致heap
profile记录的内存通常只有10%,通过在`start_be.sh`中`JEMALLOC_CONF`增加`,lg_prof_sample:10`然后重启BE,可以减少采样间隔到1K
(2^10 B), 更频繁的采样会使heap profile接近真实内存,但这会带来更大的性能损耗。详细参考
https://linux.die.net/man/3/jemalloc。
-
-如果在做性能测试,尝试删掉`JEMALLOC_CONF`中的`,prof:true`,避免heap profile采样的性能损耗。
-
##### 3. heap dump by JEMALLOC_CONF
通过更改`start_be.sh` 中`JEMALLOC_CONF` 变量后重新启动BE 来进行heap dump
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]