IMPALA-5331: Use new libHDFS API to address "Unknown Error 255" We use the new libHDFS API hdfsGetLastExceptionRootCause() to return the last seen HDFS error on that thread.
This patch depends on the recent HDFS commit: https://github.com/apache/hadoop/commit/fda86ef2a32026c02d9b5d4cca1ecb7b4decd872 Testing: A test has been added which puts HDFS in safe mode and then verifies that we see a 255 error with the root cause. Change-Id: I181e316ed63b70b94d4f7a7557d398a931bb171d Reviewed-on: http://gerrit.cloudera.org:8080/6894 Tested-by: Impala Public Jenkins Reviewed-by: Alex Behm <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/44e8bbff Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/44e8bbff Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/44e8bbff Branch: refs/heads/master Commit: 44e8bbffc33d125f1b357f4605a95bb37946de51 Parents: 6a31d35 Author: Sailesh Mukil <[email protected]> Authored: Mon May 15 23:30:50 2017 -0700 Committer: Alex Behm <[email protected]> Committed: Tue May 23 16:42:48 2017 +0000 ---------------------------------------------------------------------- be/src/util/hdfs-bulk-ops.cc | 2 +- be/src/util/hdfs-util.cc | 4 +++ tests/data_errors/test_data_errors.py | 39 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/be/src/util/hdfs-bulk-ops.cc ---------------------------------------------------------------------- diff --git a/be/src/util/hdfs-bulk-ops.cc b/be/src/util/hdfs-bulk-ops.cc index 7b99b79..c5c7ad5 100644 --- a/be/src/util/hdfs-bulk-ops.cc +++ b/be/src/util/hdfs-bulk-ops.cc @@ -126,7 +126,7 @@ void HdfsOp::Execute() const { if (err == -1 || !connection_status.ok()) { string error_msg = - connection_status.ok() ? GetStrErrMsg() : connection_status.GetDetail(); + connection_status.ok() ? GetHdfsErrorMsg("", src_) : connection_status.GetDetail(); AddError(error_msg); } op_set_->MarkOneOpDone(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/be/src/util/hdfs-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/hdfs-util.cc b/be/src/util/hdfs-util.cc index 558fedd..440b68d 100644 --- a/be/src/util/hdfs-util.cc +++ b/be/src/util/hdfs-util.cc @@ -31,6 +31,10 @@ string GetHdfsErrorMsg(const string& prefix, const string& file) { string error_msg = GetStrErrMsg(); stringstream ss; ss << prefix << file << "\n" << error_msg; + char* root_cause = hdfsGetLastExceptionRootCause(); + if (root_cause != nullptr) { + ss << "\nRoot cause: " << root_cause; + } return ss.str(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/44e8bbff/tests/data_errors/test_data_errors.py ---------------------------------------------------------------------- diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index 60d70b7..fa3f189 100644 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -21,6 +21,7 @@ import pytest import random +import subprocess from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite @@ -64,6 +65,44 @@ class TestHdfsFileOpenFailErrors(ImpalaTestSuite): assert "Failed to open HDFS file" in str(e) self.client.execute(drop_stmt) +# Test for IMPALA-5331 to verify that the libHDFS API hdfsGetLastExceptionRootCause() +# works. [email protected]_hdfs +class TestHdfsUnknownErrors(ImpalaTestSuite): + @pytest.mark.execute_serially + def test_hdfs_safe_mode_error_255(self, unique_database): + create_stmt = "create table {0}.safe_mode_fail (x int)".format(unique_database) + insert_stmt = "insert into {0}.safe_mode_fail values (1)".format(unique_database) + self.execute_query_expect_success(self.client, create_stmt) + self.execute_query_expect_success(self.client, insert_stmt) + try: + # Check that we're not in safe mode. + output, error = subprocess.Popen( + ['hdfs', 'dfsadmin', '-safemode', 'get'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + assert error is "", "Couldn't get status of Safe mode. Error: %s" % (error) + assert "Safe mode is OFF" in output + # Turn safe mode on. + output, error = subprocess.Popen( + ['hdfs', 'dfsadmin', '-safemode', 'enter'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + assert error is "", "Couldn't turn Safe mode ON. Error: %s" % (error) + assert "Safe mode is ON" in output + + # We shouldn't be able to write to HDFS when it's in safe mode. + ex = self.execute_query_expect_failure(self.client, insert_stmt) + + # Confirm that it is an Unknown error with error code 255. + assert "Unknown error 255" in str(ex) + # Confirm that we were able to get the root cause. + assert "Name node is in safe mode" in str(ex) + finally: + # Leave safe mode. + output, error = subprocess.Popen( + ['hdfs', 'dfsadmin', '-safemode', 'leave'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + assert error is "", "Couldn't turn Safe mode OFF. Error: %s" % (error) + assert "Safe mode is OFF" in output @SkipIfS3.qualified_path class TestHdfsScanNodeErrors(TestDataErrors):
