This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9b3be0ba7a [Fix](multi-catalog) Do not throw exceptions when file not
exists for external hive tables. (#23799)
9b3be0ba7a is described below
commit 9b3be0ba7a6effb0cbf183da2d57f5a3aaf18b12
Author: Xiangyu Wang <[email protected]>
AuthorDate: Sun Sep 10 21:55:09 2023 +0800
[Fix](multi-catalog) Do not throw exceptions when file not exists for
external hive tables. (#23799)
A similar bug compares to #22140 .
When executing a query with hms catalog, the query maybe failed because
some hdfs files are not existed. We should just distinguish this kind of errors
and skip it.
```
errCode = 2, detailMessage =
(xxx.xxx.xxx.xxx)[CANCELLED][INTERNAL_ERROR]failed to init reader for file
hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc,
err: [INTERNAL_ERROR]Init OrcReader failed. reason = Failed to read
hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc:
[INTERNAL_ERROR]Read hdfs file failed. (BE: xxx.xxx.xxx.xxx) [...]
at
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:158)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1927)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:738)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:426)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) at
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) at
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682)
```
---
be/src/io/fs/file_handle_cache.cpp | 2 ++
be/src/io/fs/hdfs_file_reader.cpp | 26 ++++++++++++++++++++++----
be/src/vec/exec/format/orc/vorc_reader.cpp | 8 +++++++-
3 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/be/src/io/fs/file_handle_cache.cpp
b/be/src/io/fs/file_handle_cache.cpp
index 7b31ccce6a..815be0f99b 100644
--- a/be/src/io/fs/file_handle_cache.cpp
+++ b/be/src/io/fs/file_handle_cache.cpp
@@ -42,6 +42,8 @@ Status HdfsFileHandle::init(int64_t file_size) {
_hdfs_file = hdfsOpenFile(_fs, _fname.c_str(), O_RDONLY, 0, 0, 0);
if (_hdfs_file == nullptr) {
std::string _err_msg = hdfs_error();
+ // invoker maybe just skip Status.NotFound and continue
+ // so we need distinguish between it and other kinds of errors
if (_err_msg.find("No such file or directory") != std::string::npos) {
return Status::NotFound(_err_msg);
}
diff --git a/be/src/io/fs/hdfs_file_reader.cpp
b/be/src/io/fs/hdfs_file_reader.cpp
index 6c4f456e37..d344447ae5 100644
--- a/be/src/io/fs/hdfs_file_reader.cpp
+++ b/be/src/io/fs/hdfs_file_reader.cpp
@@ -138,9 +138,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
tSize loop_read = hdfsPread(_handle->fs(), _handle->file(), offset +
has_read,
to + has_read, bytes_req - has_read);
if (loop_read < 0) {
+ // invoker maybe just skip Status.NotFound and continue
+ // so we need distinguish between it and other kinds of errors
+ std::string _err_msg = hdfs_error();
+ if (_err_msg.find("No such file or directory") !=
std::string::npos) {
+ return Status::NotFound(_err_msg);
+ }
return Status::InternalError(
"Read hdfs file failed. (BE: {}) namenode:{}, path:{},
err: {}",
- BackendOptions::get_localhost(), _name_node,
_path.string(), hdfs_error());
+ BackendOptions::get_localhost(), _name_node,
_path.string(), _err_msg);
}
if (loop_read == 0) {
break;
@@ -153,7 +159,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
#else
// The hedged read only support hdfsPread().
-// TODO: rethink here to see if there are some difference betwenn hdfsPread()
and hdfsRead()
+// TODO: rethink here to see if there are some difference between hdfsPread()
and hdfsRead()
Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t*
bytes_read,
const IOContext* /*io_ctx*/) {
DCHECK(!closed());
@@ -164,8 +170,14 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
int res = hdfsSeek(_handle->fs(), _handle->file(), offset);
if (res != 0) {
+ // invoker maybe just skip Status.NotFound and continue
+ // so we need distinguish between it and other kinds of errors
+ std::string _err_msg = hdfs_error();
+ if (_err_msg.find("No such file or directory") != std::string::npos) {
+ return Status::NotFound(_err_msg);
+ }
return Status::InternalError("Seek to offset failed. (BE: {})
offset={}, err: {}",
- BackendOptions::get_localhost(), offset,
hdfs_error());
+ BackendOptions::get_localhost(), offset,
_err_msg);
}
size_t bytes_req = result.size;
@@ -181,9 +193,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
int64_t loop_read =
hdfsRead(_handle->fs(), _handle->file(), to + has_read,
bytes_req - has_read);
if (loop_read < 0) {
+ // invoker maybe just skip Status.NotFound and continue
+ // so we need distinguish between it and other kinds of errors
+ std::string _err_msg = hdfs_error();
+ if (_err_msg.find("No such file or directory") !=
std::string::npos) {
+ return Status::NotFound(_err_msg);
+ }
return Status::InternalError(
"Read hdfs file failed. (BE: {}) namenode:{}, path:{},
err: {}",
- BackendOptions::get_localhost(), _name_node,
_path.string(), hdfs_error());
+ BackendOptions::get_localhost(), _name_node,
_path.string(), _err_msg);
}
if (loop_read == 0) {
break;
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index e4a1678a37..1e7f51542e 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -236,7 +236,13 @@ Status OrcReader::_create_file_reader() {
_reader = orc::createReader(
std::unique_ptr<ORCFileInputStream>(_file_input_stream.release()), options);
} catch (std::exception& e) {
- return Status::InternalError("Init OrcReader failed. reason = {}",
e.what());
+ // invoker maybe just skip Status.NotFound and continue
+ // so we need distinguish between it and other kinds of errors
+ std::string _err_msg = e.what();
+ if (_err_msg.find("No such file or directory") != std::string::npos) {
+ return Status::NotFound(_err_msg);
+ }
+ return Status::InternalError("Init OrcReader failed. reason = {}",
_err_msg);
}
_remaining_rows = _reader->getNumberOfRows();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]