IMPALA-5331: Use new libHDFS API to address "Unknown Error 255"

We use the new libHDFS API hdfsGetLastExceptionRootCause() to return
the last seen HDFS error on that thread.

This patch depends on the recent HDFS commit:
https://github.com/apache/hadoop/commit/fda86ef2a32026c02d9b5d4cca1ecb7b4decd872

Testing: A test has been added which puts HDFS in safe mode and then
verifies that we see a 255 error with the root cause.

Change-Id: I181e316ed63b70b94d4f7a7557d398a931bb171d
Reviewed-on: http://gerrit.cloudera.org:8080/6894
Tested-by: Impala Public Jenkins
Reviewed-by: Alex Behm <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/44e8bbff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/44e8bbff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/44e8bbff

Branch: refs/heads/master
Commit: 44e8bbffc33d125f1b357f4605a95bb37946de51
Parents: 6a31d35
Author: Sailesh Mukil <[email protected]>
Authored: Mon May 15 23:30:50 2017 -0700
Committer: Alex Behm <[email protected]>
Committed: Tue May 23 16:42:48 2017 +0000

----------------------------------------------------------------------
 be/src/util/hdfs-bulk-ops.cc          |  2 +-
 be/src/util/hdfs-util.cc              |  4 +++
 tests/data_errors/test_data_errors.py | 39 ++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/be/src/util/hdfs-bulk-ops.cc
----------------------------------------------------------------------
diff --git a/be/src/util/hdfs-bulk-ops.cc b/be/src/util/hdfs-bulk-ops.cc
index 7b99b79..c5c7ad5 100644
--- a/be/src/util/hdfs-bulk-ops.cc
+++ b/be/src/util/hdfs-bulk-ops.cc
@@ -126,7 +126,7 @@ void HdfsOp::Execute() const {
 
   if (err == -1 || !connection_status.ok()) {
     string error_msg =
-        connection_status.ok() ? GetStrErrMsg() : 
connection_status.GetDetail();
+        connection_status.ok() ? GetHdfsErrorMsg("", src_) : 
connection_status.GetDetail();
     AddError(error_msg);
   }
   op_set_->MarkOneOpDone();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/be/src/util/hdfs-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/hdfs-util.cc b/be/src/util/hdfs-util.cc
index 558fedd..440b68d 100644
--- a/be/src/util/hdfs-util.cc
+++ b/be/src/util/hdfs-util.cc
@@ -31,6 +31,10 @@ string GetHdfsErrorMsg(const string& prefix, const string& 
file) {
   string error_msg = GetStrErrMsg();
   stringstream ss;
   ss << prefix << file << "\n" << error_msg;
+  char* root_cause = hdfsGetLastExceptionRootCause();
+  if (root_cause != nullptr) {
+    ss << "\nRoot cause: " << root_cause;
+  }
   return ss.str();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/tests/data_errors/test_data_errors.py
----------------------------------------------------------------------
diff --git a/tests/data_errors/test_data_errors.py 
b/tests/data_errors/test_data_errors.py
index 60d70b7..fa3f189 100644
--- a/tests/data_errors/test_data_errors.py
+++ b/tests/data_errors/test_data_errors.py
@@ -21,6 +21,7 @@
 
 import pytest
 import random
+import subprocess
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
@@ -64,6 +65,44 @@ class TestHdfsFileOpenFailErrors(ImpalaTestSuite):
       assert "Failed to open HDFS file" in str(e)
     self.client.execute(drop_stmt)
 
+# Test for IMPALA-5331 to verify that the libHDFS API 
hdfsGetLastExceptionRootCause()
+# works.
[email protected]_hdfs
+class TestHdfsUnknownErrors(ImpalaTestSuite):
+  @pytest.mark.execute_serially
+  def test_hdfs_safe_mode_error_255(self, unique_database):
+    create_stmt = "create table {0}.safe_mode_fail (x 
int)".format(unique_database)
+    insert_stmt = "insert into {0}.safe_mode_fail values 
(1)".format(unique_database)
+    self.execute_query_expect_success(self.client, create_stmt)
+    self.execute_query_expect_success(self.client, insert_stmt)
+    try:
+      # Check that we're not in safe mode.
+      output, error = subprocess.Popen(
+          ['hdfs', 'dfsadmin', '-safemode', 'get'],
+              stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+      assert error is "", "Couldn't get status of Safe mode. Error: %s" % 
(error)
+      assert "Safe mode is OFF" in output
+      # Turn safe mode on.
+      output, error = subprocess.Popen(
+          ['hdfs', 'dfsadmin', '-safemode', 'enter'],
+              stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+      assert error is "", "Couldn't turn Safe mode ON. Error: %s" % (error)
+      assert "Safe mode is ON" in output
+
+      # We shouldn't be able to write to HDFS when it's in safe mode.
+      ex = self.execute_query_expect_failure(self.client, insert_stmt)
+
+      # Confirm that it is an Unknown error with error code 255.
+      assert "Unknown error 255" in str(ex)
+      # Confirm that we were able to get the root cause.
+      assert "Name node is in safe mode" in str(ex)
+    finally:
+      # Leave safe mode.
+      output, error = subprocess.Popen(
+          ['hdfs', 'dfsadmin', '-safemode', 'leave'],
+              stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+      assert error is "", "Couldn't turn Safe mode OFF. Error: %s" % (error)
+      assert "Safe mode is OFF" in output
 
 @SkipIfS3.qualified_path
 class TestHdfsScanNodeErrors(TestDataErrors):

Reply via email to