This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e0d528980f [fix](multi catalog)Return emtpy block while external table
scanner couldn't find the file (#14997)
e0d528980f is described below
commit e0d528980f1987dd742aa104f4728031703007de
Author: Jibing-Li <[email protected]>
AuthorDate: Fri Dec 16 09:36:35 2022 +0800
[fix](multi catalog)Return emtpy block while external table scanner
couldn't find the file (#14997)
FE file path cache for external table may out of date. In this case, BE may
fail to find the not exist file from FE cache.
This pr is to handle this case: instead of throw an error message to the
user, we return empty result set to the user.
---
be/src/io/hdfs_file_reader.cpp | 3 +++
be/src/vec/exec/scan/scanner_scheduler.cpp | 12 +++++++++++-
be/src/vec/exec/scan/vfile_scanner.cpp | 3 +++
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/be/src/io/hdfs_file_reader.cpp b/be/src/io/hdfs_file_reader.cpp
index 8c0e77e136..ba26154388 100644
--- a/be/src/io/hdfs_file_reader.cpp
+++ b/be/src/io/hdfs_file_reader.cpp
@@ -72,6 +72,9 @@ Status HdfsFileReader::open() {
RETURN_IF_ERROR(HdfsFsCache::instance()->get_connection(_hdfs_params,
&_fs_handle));
_hdfs_fs = _fs_handle->hdfs_fs;
+ if (hdfsExists(_hdfs_fs, _path.c_str()) != 0) {
+ return Status::NotFound("{} not exists!", _path);
+ }
_hdfs_file = hdfsOpenFile(_hdfs_fs, _path.c_str(), O_RDONLY, 0, 0, 0);
if (_hdfs_file == nullptr) {
if (_fs_handle->from_cache) {
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp
b/be/src/vec/exec/scan/scanner_scheduler.cpp
index f2170a9599..d45adc13b9 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -26,6 +26,7 @@
#include "vec/core/block.h"
#include "vec/exec/scan/vscanner.h"
#include "vec/exprs/vexpr.h"
+#include "vfile_scanner.h"
namespace doris::vectorized {
@@ -232,12 +233,21 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler*
scheduler, ScannerContext
auto block = ctx->get_free_block(&get_free_block);
status = scanner->get_block(state, block, &eos);
VLOG_ROW << "VOlapScanNode input rows: " << block->rows() << ", eos: "
<< eos;
- if (!status.ok()) {
+ // The VFileScanner for external table may try to open not exist files,
+ // Because FE file cache for external table may out of date.
+ if (!status.ok() && (typeid(*scanner) ==
typeid(doris::vectorized::VFileScanner) &&
+ !status.is<ErrorCode::NOT_FOUND>())) {
LOG(WARNING) << "Scan thread read VOlapScanner failed: " <<
status.to_string();
// Add block ptr in blocks, prevent mem leak in read failed
blocks.push_back(block);
break;
}
+ if (status.is<ErrorCode::NOT_FOUND>()) {
+ // The only case in this if branch is external table file delete
and fe cache has not been updated yet.
+ // Set status to OK.
+ status = Status::OK();
+ eos = true;
+ }
raw_bytes_read += block->bytes();
num_rows_in_block += block->rows();
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 1d67056920..1ad6928799 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -531,6 +531,9 @@ Status VFileScanner::_get_next_reader() {
if (init_status.is<END_OF_FILE>()) {
continue;
} else if (!init_status.ok()) {
+ if (init_status.is<ErrorCode::NOT_FOUND>()) {
+ return init_status;
+ }
return Status::InternalError("failed to init reader for file {},
err: {}", range.path,
init_status.to_string());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]