This is an automated email from the ASF dual-hosted git repository. lv pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0b7c9645451e6c2d30e225a27a89a901df940b09 Author: Philip Zeyliger <phi...@cloudera.com> AuthorDate: Thu Feb 7 15:31:19 2019 -0800 Adding hostname to Disk I/O errors. I recently ran into some queries that failed like so: WARNINGS: Disk I/O error: Could not open file: /data/...: Error(5): Input/output error These warnings were in the profile, but I had to cross-reference impalad logs to figure out which machine had the broken disk. In this commit, I've sprinkled GetBackendString() to include it. Change-Id: Ib977d2c0983ef81ab1338de090239ed57f3efde2 Reviewed-on: http://gerrit.cloudera.org:8080/12402 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/exprs/timezone_db.cc | 3 ++- be/src/runtime/io/disk-io-mgr-test.cc | 10 ++++++---- be/src/runtime/io/disk-io-mgr.cc | 8 ++++---- be/src/runtime/io/error-converter.cc | 4 ++-- be/src/runtime/io/hdfs-file-reader.cc | 8 ++++---- be/src/runtime/io/hdfs-monitored-ops.cc | 2 +- be/src/runtime/io/local-file-reader.cc | 6 +++--- common/thrift/generate_error_codes.py | 2 +- 8 files changed, 23 insertions(+), 20 deletions(-) diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc index 577dea2..ad50e6b 100644 --- a/be/src/exprs/timezone_db.cc +++ b/be/src/exprs/timezone_db.cc @@ -30,6 +30,7 @@ #include "gutil/strings/ascii_ctype.h" #include "gutil/strings/substitute.h" #include "runtime/hdfs-fs-cache.h" +#include "util/debug-util.h" #include "util/filesystem-util.h" #include "util/hdfs-util.h" #include "util/string-parser.h" @@ -387,7 +388,7 @@ Status TimezoneDatabase::LoadZoneAliasesFromHdfs(const string& hdfs_zone_alias_c current_bytes_read = hdfsRead(hdfs_conn, hdfs_file, buffer.data(), buffer.size()); if (current_bytes_read == 0) break; if (current_bytes_read < 0) { - status = Status(TErrorCode::DISK_IO_ERROR, + status = Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), GetHdfsErrorMsg("Error reading from HDFS file: ", hdfs_zone_alias_conf)); break; } diff --git a/be/src/runtime/io/disk-io-mgr-test.cc b/be/src/runtime/io/disk-io-mgr-test.cc index 057713a..a8e07b8 100644 --- a/be/src/runtime/io/disk-io-mgr-test.cc +++ b/be/src/runtime/io/disk-io-mgr-test.cc @@ -360,8 +360,9 @@ TEST_F(DiskIoMgrTest, InvalidWrite) { WriteRange::WriteDoneCallback callback = bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes, new_range, nullptr, nullptr, nullptr, data, - Status(TErrorCode::DISK_IO_ERROR, "open() failed for /non-existent/file.txt. " - "The given path doesn't exist. errno=2"), _1); + Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), + "open() failed for /non-existent/file.txt. " + "The given path doesn't exist. errno=2"), _1); *new_range = pool_.Add(new WriteRange(tmp_file, rand(), 0, callback)); (*new_range)->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t)); @@ -378,7 +379,8 @@ TEST_F(DiskIoMgrTest, InvalidWrite) { new_range = pool_.Add(new WriteRange*); callback = bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes, new_range, nullptr, nullptr, nullptr, data, - Status(TErrorCode::DISK_IO_ERROR, "fseek() failed for /tmp/disk_io_mgr_test.txt. " + Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), + "fseek() failed for /tmp/disk_io_mgr_test.txt. " "Invalid inputs. errno=22, offset=-1"), _1); *new_range = pool_.Add(new WriteRange(tmp_file, -1, 0, callback)); @@ -466,7 +468,7 @@ void DiskIoMgrTest::AddWriteRange(int num_of_writes, int32_t* data, WriteRange::WriteDoneCallback callback = bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes, nullptr, nullptr, nullptr, nullptr, data, - Status(TErrorCode::DISK_IO_ERROR, expected_output), _1); + Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), expected_output), _1); WriteRange* write_range = pool_.Add(new WriteRange(file_name, offset, 0, callback)); write_range->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t)); EXPECT_OK(writer->AddWriteRange(write_range)); diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc index ce56be0..394745f 100644 --- a/be/src/runtime/io/disk-io-mgr.cc +++ b/be/src/runtime/io/disk-io-mgr.cc @@ -293,15 +293,15 @@ void DiskIoMgr::UnregisterContext(RequestContext* reader) { Status DiskIoMgr::ValidateScanRange(ScanRange* range) { int disk_id = range->disk_id(); if (disk_id < 0 || disk_id >= disk_queues_.size()) { - return Status(TErrorCode::DISK_IO_ERROR, - Substitute("Invalid scan range. Bad disk id: $0", disk_id)); + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), + Substitute("Invalid scan range. Bad disk id: $0", disk_id)); } if (range->offset() < 0) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Invalid scan range. Negative offset $0", range->offset())); } if (range->len() <= 0) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Invalid scan range. Non-positive length $0", range->len())); } return Status::OK(); diff --git a/be/src/runtime/io/error-converter.cc b/be/src/runtime/io/error-converter.cc index 966558d..547f086 100644 --- a/be/src/runtime/io/error-converter.cc +++ b/be/src/runtime/io/error-converter.cc @@ -49,8 +49,8 @@ unordered_map<int, string> ErrorConverter::errno_to_error_text_map_( Status ErrorConverter::GetErrorStatusFromErrno(const string& function_name, const string& file_path, int err_no, const Params& params) { - return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetErrorText(function_name, - file_path, err_no, params))); + return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetBackendString(), + GetErrorText(function_name, file_path, err_no, params))); } string ErrorConverter::GetErrorText(const string& function_name, diff --git a/be/src/runtime/io/hdfs-file-reader.cc b/be/src/runtime/io/hdfs-file-reader.cc index b495d61..0bbf984 100644 --- a/be/src/runtime/io/hdfs-file-reader.cc +++ b/be/src/runtime/io/hdfs-file-reader.cc @@ -59,7 +59,7 @@ Status HdfsFileReader::Open(bool use_file_handle_cache) { if (hdfsSeek(hdfs_fs_, exclusive_hdfs_fh_->file(), scan_range_->offset_) != 0) { // Destroy the file handle io_mgr->ReleaseExclusiveHdfsFileHandle(std::move(exclusive_hdfs_fh_)); - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Error seeking to $0 in file: $1 $2", scan_range_->offset(), *scan_range_->file_string(), GetHdfsErrorMsg(""))); } @@ -165,7 +165,7 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_ if (FLAGS_use_hdfs_pread) { *bytes_read = hdfsPread(hdfs_fs_, hdfs_file, position_in_file, buffer, chunk_size); if (*bytes_read == -1) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), GetHdfsErrorMsg("Error reading from HDFS file: ", *scan_range_->file_string())); } @@ -174,14 +174,14 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_ // location. Seek to the appropriate location. if (is_borrowed_fh) { if (hdfsSeek(hdfs_fs_, hdfs_file, position_in_file) != 0) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Error seeking to $0 in file: $1: $2", position_in_file, *scan_range_->file_string(), GetHdfsErrorMsg(""))); } } *bytes_read = hdfsRead(hdfs_fs_, hdfs_file, buffer, chunk_size); if (*bytes_read == -1) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), GetHdfsErrorMsg("Error reading from HDFS file: ", *scan_range_->file_string())); } diff --git a/be/src/runtime/io/hdfs-monitored-ops.cc b/be/src/runtime/io/hdfs-monitored-ops.cc index 3ea5c86..f864b7c 100644 --- a/be/src/runtime/io/hdfs-monitored-ops.cc +++ b/be/src/runtime/io/hdfs-monitored-ops.cc @@ -69,7 +69,7 @@ Status OpenHdfsFileOp::Execute() { if (hdfs_file_ == nullptr) { // GetHdfsErrorMsg references thread local state to get error information, so it // must happen in the same thread as the hdfsOpenFile(). - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), GetHdfsErrorMsg("Failed to open HDFS file ", fname_)); } return Status::OK(); diff --git a/be/src/runtime/io/local-file-reader.cc b/be/src/runtime/io/local-file-reader.cc index 3f88106..9be45a9 100644 --- a/be/src/runtime/io/local-file-reader.cc +++ b/be/src/runtime/io/local-file-reader.cc @@ -40,7 +40,7 @@ Status LocalFileReader::Open(bool use_file_handle_cache) { file_ = fopen(scan_range_->file(), "r"); if (file_ == nullptr) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Could not open file: $0: $1", *scan_range_->file_string(), GetStrErrMsg())); } @@ -68,7 +68,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer, if (fseek(file_, file_offset, SEEK_SET) == -1) { fclose(file_); file_ = nullptr; - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Could not seek to $0 " "for file: $1: $2", scan_range_->offset(), *scan_range_->file_string(), GetStrErrMsg())); @@ -78,7 +78,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer, DCHECK_LE(*bytes_read, bytes_to_read); if (*bytes_read < bytes_to_read) { if (ferror(file_) != 0) { - return Status(TErrorCode::DISK_IO_ERROR, + return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), Substitute("Error reading from $0" "at byte offset: $1: $2", file_, file_offset, GetStrErrMsg())); diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py index 93fed7d..10f7d4d 100755 --- a/common/thrift/generate_error_codes.py +++ b/common/thrift/generate_error_codes.py @@ -335,7 +335,7 @@ error_codes = ( ("THREAD_CREATION_FAILED", 109, "Failed to create thread $0 in category $1: $2"), - ("DISK_IO_ERROR", 110, "Disk I/O error: $0"), + ("DISK_IO_ERROR", 110, "Disk I/O error on $0: $1"), ("DATASTREAM_RECVR_CLOSED", 111, "DataStreamRecvr for fragment=$0, node=$1 is closed already"),