This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 71a23d01cf2 [opt](file-scanner) add not found file number in profile
(#37042)
71a23d01cf2 is described below
commit 71a23d01cf25bff5c9f20057430862c7a3c7de00
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Jul 5 13:06:08 2024 +0800
[opt](file-scanner) add not found file number in profile (#37042)
PR #35319 ignore the not found files in external table by default.
This PR add a BE config `ignore_not_found_file_in_external_table` to
control this behavior,
and the default value is still `true`.
Also add a new metric `NotFoundFileNum`, separate from `EmptyFileNum`,
to record the number of not found files in a query
---
be/src/common/config.cpp | 2 ++
be/src/common/config.h | 4 ++++
be/src/vec/exec/scan/vfile_scanner.cpp | 6 ++++--
be/src/vec/exec/scan/vfile_scanner.h | 1 +
4 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 5ce525022f3..61a460d4084 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1324,6 +1324,8 @@ DEFINE_mInt64(compaction_batch_size, "-1");
// filter wrong data.
DEFINE_mBool(enable_parquet_page_index, "true");
+DEFINE_mBool(ignore_not_found_file_in_external_table, "true");
+
// clang-format off
#ifdef BE_TEST
// test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index a3b94705dea..b30cd6265f3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1407,6 +1407,10 @@ DECLARE_mInt64(compaction_batch_size);
DECLARE_mBool(enable_parquet_page_index);
+// Wheather to ignore not found file in external teble(eg, hive)
+// Default is true, if set to false, the not found file will result in query
failure.
+DECLARE_mBool(ignore_not_found_file_in_external_table);
+
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index f6f029b9de0..0c7929f0bff 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -135,6 +135,8 @@ Status VFileScanner::prepare(
_convert_to_output_block_timer =
ADD_TIMER(_local_state->scanner_profile(),
"FileScannerConvertOuputBlockTime");
_empty_file_counter = ADD_COUNTER(_local_state->scanner_profile(),
"EmptyFileNum", TUnit::UNIT);
+ _not_found_file_counter =
+ ADD_COUNTER(_local_state->scanner_profile(), "NotFoundFileNum",
TUnit::UNIT);
_file_counter = ADD_COUNTER(_local_state->scanner_profile(), "FileNumber",
TUnit::UNIT);
_has_fully_rf_file_counter =
ADD_COUNTER(_local_state->scanner_profile(),
"HasFullyRfFileNumber", TUnit::UNIT);
@@ -283,9 +285,9 @@ Status VFileScanner::_get_block_wrapped(RuntimeState*
state, Block* block, bool*
// And the file may already be removed from storage.
// Just ignore not found files.
Status st = _get_next_reader();
- if (st.is<ErrorCode::NOT_FOUND>()) {
+ if (st.is<ErrorCode::NOT_FOUND>() &&
config::ignore_not_found_file_in_external_table) {
_cur_reader_eof = true;
- COUNTER_UPDATE(_empty_file_counter, 1);
+ COUNTER_UPDATE(_not_found_file_counter, 1);
continue;
} else if (!st) {
return st;
diff --git a/be/src/vec/exec/scan/vfile_scanner.h
b/be/src/vec/exec/scan/vfile_scanner.h
index 332bdfe11e1..fb61c5aa19e 100644
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@@ -178,6 +178,7 @@ private:
RuntimeProfile::Counter* _pre_filter_timer = nullptr;
RuntimeProfile::Counter* _convert_to_output_block_timer = nullptr;
RuntimeProfile::Counter* _empty_file_counter = nullptr;
+ RuntimeProfile::Counter* _not_found_file_counter = nullptr;
RuntimeProfile::Counter* _file_counter = nullptr;
RuntimeProfile::Counter* _has_fully_rf_file_counter = nullptr;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]