This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 73404405e68 [fix](hdfs) Fix hdfsExists that return staled root cause 
(#27991)
73404405e68 is described below

commit 73404405e68f92b9363a03f78854c78f985f01d4
Author: walter <[email protected]>
AuthorDate: Wed Dec 6 08:24:13 2023 +0800

    [fix](hdfs) Fix hdfsExists that return staled root cause (#27991)
    
    The HDFS native client won't clear the last exception as expected so 
`hdfsGetLastExceptionRootCause` might return a staled root cause. This PR saves 
the last root cause here and verifies after hdfsExists returns a non-zero code.
---
 be/src/io/fs/hdfs_file_system.cpp | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/be/src/io/fs/hdfs_file_system.cpp 
b/be/src/io/fs/hdfs_file_system.cpp
index 1e71ade9344..7e0062d2430 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -227,15 +227,28 @@ Status HdfsFileSystem::delete_internal(const Path& path, 
int is_recursive) {
 Status HdfsFileSystem::exists_impl(const Path& path, bool* res) const {
     CHECK_HDFS_HANDLE(_fs_handle);
     Path real_path = convert_path(path, _fs_name);
+#ifdef USE_HADOOP_HDFS
+    // HACK: the HDFS native client won't clear the last exception as expected 
so
+    // `hdfsGetLastExceptionRootCause` might return a staled root cause. Save 
the
+    // last root cause here and verify after hdfsExists returns a non-zero 
code.
+    //
+    // See details:
+    //  
https://github.com/apache/hadoop/blob/5cda162a804fb0cfc2a5ac0058ab407662c5fb00/
+    //  
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c#L795
+    char* former_root_cause = hdfsGetLastExceptionRootCause();
+#endif
     int is_exists = hdfsExists(_fs_handle->hdfs_fs, 
real_path.string().c_str());
 #ifdef USE_HADOOP_HDFS
     // when calling hdfsExists() and return non-zero code,
     // if root_cause is nullptr, which means the file does not exist.
     // if root_cause is not nullptr, which means it encounter other error, 
should return.
     // NOTE: not for libhdfs3 since it only runs on MaxOS, don't have to 
support it.
-    char* root_cause = hdfsGetLastExceptionRootCause();
-    if (root_cause != nullptr) {
-        return Status::IOError("failed to check path existence {}: {}", 
path.native(), root_cause);
+    if (is_exists != 0) {
+        char* root_cause = hdfsGetLastExceptionRootCause();
+        if (root_cause != nullptr && root_cause != former_root_cause) {
+            return Status::IOError("failed to check path existence {}: {}", 
path.native(),
+                                   root_cause);
+        }
     }
 #endif
     *res = (is_exists == 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to