This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 71a23d01cf2 [opt](file-scanner) add not found file number in profile 
(#37042)
71a23d01cf2 is described below

commit 71a23d01cf25bff5c9f20057430862c7a3c7de00
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Jul 5 13:06:08 2024 +0800

    [opt](file-scanner) add not found file number in profile (#37042)
    
    PR #35319 ignore the not found files in external table by default.
    This PR add a BE config `ignore_not_found_file_in_external_table` to
    control this behavior,
    and the default value is still `true`.
    
    Also add a new metric `NotFoundFileNum`, separate from `EmptyFileNum`,
    to record the number of not found files in a query
---
 be/src/common/config.cpp               | 2 ++
 be/src/common/config.h                 | 4 ++++
 be/src/vec/exec/scan/vfile_scanner.cpp | 6 ++++--
 be/src/vec/exec/scan/vfile_scanner.h   | 1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 5ce525022f3..61a460d4084 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1324,6 +1324,8 @@ DEFINE_mInt64(compaction_batch_size, "-1");
 // filter wrong data.
 DEFINE_mBool(enable_parquet_page_index, "true");
 
+DEFINE_mBool(ignore_not_found_file_in_external_table, "true");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index a3b94705dea..b30cd6265f3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1407,6 +1407,10 @@ DECLARE_mInt64(compaction_batch_size);
 
 DECLARE_mBool(enable_parquet_page_index);
 
+// Wheather to ignore not found file in external teble(eg, hive)
+// Default is true, if set to false, the not found file will result in query 
failure.
+DECLARE_mBool(ignore_not_found_file_in_external_table);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index f6f029b9de0..0c7929f0bff 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -135,6 +135,8 @@ Status VFileScanner::prepare(
     _convert_to_output_block_timer =
             ADD_TIMER(_local_state->scanner_profile(), 
"FileScannerConvertOuputBlockTime");
     _empty_file_counter = ADD_COUNTER(_local_state->scanner_profile(), 
"EmptyFileNum", TUnit::UNIT);
+    _not_found_file_counter =
+            ADD_COUNTER(_local_state->scanner_profile(), "NotFoundFileNum", 
TUnit::UNIT);
     _file_counter = ADD_COUNTER(_local_state->scanner_profile(), "FileNumber", 
TUnit::UNIT);
     _has_fully_rf_file_counter =
             ADD_COUNTER(_local_state->scanner_profile(), 
"HasFullyRfFileNumber", TUnit::UNIT);
@@ -283,9 +285,9 @@ Status VFileScanner::_get_block_wrapped(RuntimeState* 
state, Block* block, bool*
             // And the file may already be removed from storage.
             // Just ignore not found files.
             Status st = _get_next_reader();
-            if (st.is<ErrorCode::NOT_FOUND>()) {
+            if (st.is<ErrorCode::NOT_FOUND>() && 
config::ignore_not_found_file_in_external_table) {
                 _cur_reader_eof = true;
-                COUNTER_UPDATE(_empty_file_counter, 1);
+                COUNTER_UPDATE(_not_found_file_counter, 1);
                 continue;
             } else if (!st) {
                 return st;
diff --git a/be/src/vec/exec/scan/vfile_scanner.h 
b/be/src/vec/exec/scan/vfile_scanner.h
index 332bdfe11e1..fb61c5aa19e 100644
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@@ -178,6 +178,7 @@ private:
     RuntimeProfile::Counter* _pre_filter_timer = nullptr;
     RuntimeProfile::Counter* _convert_to_output_block_timer = nullptr;
     RuntimeProfile::Counter* _empty_file_counter = nullptr;
+    RuntimeProfile::Counter* _not_found_file_counter = nullptr;
     RuntimeProfile::Counter* _file_counter = nullptr;
     RuntimeProfile::Counter* _has_fully_rf_file_counter = nullptr;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to