This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit a06794604f7276c921029514ce23db08ac14a150 Author: Yongqiang YANG <[email protected]> AuthorDate: Sun Sep 17 11:07:05 2023 +0800 [enhancement](disk) refine io error and report bad when disk is abnormal (#24390) --- be/src/io/cache/block/cached_remote_file_reader.cpp | 6 +++--- be/src/olap/tablet.cpp | 5 +++-- be/src/olap/tablet.h | 8 +++++++- be/src/olap/utils.cpp | 8 ++++---- be/src/util/bitmap_value.h | 2 +- be/src/vec/columns/column_array.cpp | 2 +- be/src/vec/columns/column_map.cpp | 2 +- be/src/vec/olap/block_reader.cpp | 4 ++++ be/src/vec/olap/block_reader.h | 6 +++++- be/src/vec/olap/vertical_block_reader.cpp | 4 ++-- be/src/vec/olap/vertical_block_reader.h | 4 ++-- 11 files changed, 33 insertions(+), 18 deletions(-) diff --git a/be/src/io/cache/block/cached_remote_file_reader.cpp b/be/src/io/cache/block/cached_remote_file_reader.cpp index 6e2ecb0aa5f..0b013b2833f 100644 --- a/be/src/io/cache/block/cached_remote_file_reader.cpp +++ b/be/src/io/cache/block/cached_remote_file_reader.cpp @@ -186,7 +186,7 @@ Status CachedRemoteFileReader::_read_from_cache(size_t offset, Slice result, siz break; } if (segment_state != FileBlock::State::DOWNLOADING) { - return Status::IOError( + return Status::InternalError( "File Cache State is {}, the cache downloader encounters an error, " "please " "retry it", @@ -195,7 +195,7 @@ Status CachedRemoteFileReader::_read_from_cache(size_t offset, Slice result, siz } while (++wait_time < MAX_WAIT_TIME); } if (UNLIKELY(wait_time) == MAX_WAIT_TIME) { - return Status::IOError("Waiting too long for the download to complete"); + return Status::InternalError("Waiting too long for the download to complete"); } size_t file_offset = current_offset - left; { @@ -217,7 +217,7 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t* DCHECK(!closed()); DCHECK(io_ctx); if (offset > size()) { - return Status::IOError( + return Status::InvalidArgument( fmt::format("offset exceeds file size(offset: {), file size: {}, path: {})", offset, size(), path().native())); } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index c732b200f77..217619d4145 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1638,9 +1638,10 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info, } if (tablet_state() == TABLET_RUNNING) { - if (has_version_cross || is_io_error_too_times()) { + if (has_version_cross || is_io_error_too_times() || !data_dir()->is_used()) { LOG(INFO) << "report " << full_name() << " as bad, version_cross=" << has_version_cross - << ", ioe times=" << get_io_error_times(); + << ", ioe times=" << get_io_error_times() << ", data_dir used " + << data_dir()->is_used(); tablet_info->__set_used(false); } diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index a1684fe9e65..ba2b1ab587e 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -533,7 +533,13 @@ public: void gc_binlogs(int64_t version); Status ingest_binlog_metas(RowsetBinlogMetasPB* metas_pb); - inline void increase_io_error_times() { ++_io_error_times; } + inline void report_error(const Status& st) { + if (st.is<ErrorCode::IO_ERROR>()) { + ++_io_error_times; + } else if (st.is<ErrorCode::CORRUPTION>()) { + _io_error_times = config::max_tablet_io_errors + 1; + } + } inline int64_t get_io_error_times() const { return _io_error_times; } diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp index 52f9fa8bb85..cdd7ad4c834 100644 --- a/be/src/olap/utils.cpp +++ b/be/src/olap/utils.cpp @@ -428,14 +428,14 @@ Status read_write_test_file(const std::string& test_file_path) { if (access(test_file_path.c_str(), F_OK) == 0) { if (remove(test_file_path.c_str()) != 0) { char errmsg[64]; - return Status::Error<IO_ERROR>("fail to access test file. path={}, errno={}, err={}", - test_file_path, errno, strerror_r(errno, errmsg, 64)); + return Status::IOError("fail to access test file. path={}, errno={}, err={}", + test_file_path, errno, strerror_r(errno, errmsg, 64)); } } else { if (errno != ENOENT) { char errmsg[64]; - return Status::Error<IO_ERROR>("fail to access test file. path={}, errno={}, err={}", - test_file_path, errno, strerror_r(errno, errmsg, 64)); + return Status::IOError("fail to access test file. path={}, errno={}, err={}", + test_file_path, errno, strerror_r(errno, errmsg, 64)); } } diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 041afa7e66d..410f542a99a 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -88,7 +88,7 @@ struct BitmapTypeCode { fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}", BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type); LOG(ERROR) << err_msg; - return Status::IOError(err_msg); + return Status::Corruption(err_msg); } return Status::OK(); } diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 7922b692b9b..c528c54e847 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -819,7 +819,7 @@ Status ColumnArray::filter_by_selector(const uint16_t* sel, size_t sel_size, ICo max_offset = std::max(max_offset, offset_at(sel[i])); } if (max_offset > std::numeric_limits<uint16_t>::max()) { - return Status::IOError("array elements too large than uint16_t::max"); + return Status::Corruption("array elements too large than uint16_t::max"); } to_offsets.reserve(to_offsets.size() + sel_size); diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index cca9415ef03..58a253c52ca 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -416,7 +416,7 @@ Status ColumnMap::filter_by_selector(const uint16_t* sel, size_t sel_size, IColu max_offset = std::max(max_offset, offset_at(sel[i])); } if (max_offset > std::numeric_limits<uint16_t>::max()) { - return Status::IOError("map elements too large than uint16_t::max"); + return Status::Corruption("map elements too large than uint16_t::max"); } to_offsets.reserve(to_offsets.size() + sel_size); diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp index b7a640ab4af..0c303a97444 100644 --- a/be/src/vec/olap/block_reader.cpp +++ b/be/src/vec/olap/block_reader.cpp @@ -214,6 +214,10 @@ Status BlockReader::init(const ReaderParams& read_params) { auto status = _init_collect_iter(read_params); if (!status.ok()) { + if (UNLIKELY(!status.ok() && !status.is<ErrorCode::END_OF_FILE>())) { + _tablet->report_error(status); + } + return status; } diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h index 81844099451..b573e106694 100644 --- a/be/src/vec/olap/block_reader.h +++ b/be/src/vec/olap/block_reader.h @@ -48,7 +48,11 @@ public: Status init(const ReaderParams& read_params) override; Status next_block_with_aggregation(Block* block, bool* eof) override { - return (this->*_next_block_func)(block, eof); + auto res = (this->*_next_block_func)(block, eof); + if (UNLIKELY(!res.ok() && !res.is<ErrorCode::END_OF_FILE>())) { + _tablet->report_error(res); + } + return res; } std::vector<RowLocation> current_block_row_locations() { return _block_row_locations; } diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp index 7d6f9bc5022..493517f1cff 100644 --- a/be/src/vec/olap/vertical_block_reader.cpp +++ b/be/src/vec/olap/vertical_block_reader.cpp @@ -205,8 +205,8 @@ Status VerticalBlockReader::init(const ReaderParams& read_params) { auto status = _init_collect_iter(read_params); if (!status.ok()) { - if (status.is_io_error()) { - _tablet->increase_io_error_times(); + if (UNLIKELY(!status.ok() && !status.is<ErrorCode::END_OF_FILE>())) { + _tablet->report_error(status); } return status; } diff --git a/be/src/vec/olap/vertical_block_reader.h b/be/src/vec/olap/vertical_block_reader.h index e9453601299..2a7ed375928 100644 --- a/be/src/vec/olap/vertical_block_reader.h +++ b/be/src/vec/olap/vertical_block_reader.h @@ -60,8 +60,8 @@ public: Status next_block_with_aggregation(Block* block, bool* eof) override { auto res = (this->*_next_block_func)(block, eof); - if (UNLIKELY(res.is_io_error())) { - _tablet->increase_io_error_times(); + if (UNLIKELY(!res.ok() && !res.is<ErrorCode::END_OF_FILE>())) { + _tablet->report_error(res); } return res; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
