This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8eeb435963 [improvement](meta) Enhance Doris's fault tolerance to
disk error (#16472)
8eeb435963 is described below
commit 8eeb43596329f66981f47cc8246279aeb62c33b4
Author: Lijia Liu <[email protected]>
AuthorDate: Thu Feb 23 08:40:45 2023 +0800
[improvement](meta) Enhance Doris's fault tolerance to disk error (#16472)
Sense io error.
Retry query when io error.
Greylist: When finds one disk is completely broken, or the diff of tablet
number in BE and FE meta is too large,reduce the query priority of the BE.
---
be/src/common/config.h | 4 +++
be/src/common/status.h | 5 +---
be/src/olap/data_dir.h | 1 -
be/src/olap/file_helper.cpp | 12 ++------
be/src/olap/rowset/beta_rowset.cpp | 32 ++++------------------
be/src/olap/rowset/beta_rowset_reader.cpp | 16 ++++-------
be/src/olap/rowset/beta_rowset_writer.cpp | 2 +-
be/src/olap/storage_engine.cpp | 11 --------
be/src/olap/storage_engine.h | 3 --
be/src/olap/tablet.cpp | 4 ++-
be/src/olap/tablet.h | 10 +++++++
be/src/olap/tablet_meta.cpp | 12 +++++---
be/src/vec/core/block.h | 2 ++
be/src/vec/olap/vertical_block_reader.cpp | 3 ++
be/src/vec/olap/vertical_block_reader.h | 6 +++-
be/test/olap/tablet_test.cpp | 6 ++--
.../maint-monitor/be-olap-error-code.md | 3 --
.../maint-monitor/be-olap-error-code.md | 3 --
18 files changed, 55 insertions(+), 80 deletions(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index f15d30bc7d..d3e5ecd2d2 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -924,6 +924,10 @@ CONF_Int32(max_depth_in_bkd_tree, "32");
CONF_Int32(num_broadcast_buffer, "32");
// semi-structure configs
CONF_Bool(enable_parse_multi_dimession_array, "true");
+
+// Report a tablet as bad when io errors occurs more than this value.
+CONF_mInt64(max_tablet_io_errors, "-1");
+
#ifdef BE_TEST
// test s3
CONF_String(test_s3_resource, "resource");
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 25bece2588..295e21b77c 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -177,7 +177,6 @@ E(WRITER_ROW_BLOCK_ERROR, -1202);
E(WRITER_SEGMENT_NOT_FINALIZED, -1203);
E(ROWBLOCK_DECOMPRESS_ERROR, -1300);
E(ROWBLOCK_FIND_ROW_EXCEPTION, -1301);
-E(ROWBLOCK_READ_INFO_ERROR, -1302);
E(HEADER_ADD_VERSION, -1400);
E(HEADER_DELETE_VERSION, -1401);
E(HEADER_ADD_PENDING_DELTA, -1402);
@@ -236,9 +235,7 @@ E(ROWSET_TYPE_NOT_FOUND, -3105);
E(ROWSET_ALREADY_EXIST, -3106);
E(ROWSET_CREATE_READER, -3107);
E(ROWSET_INVALID, -3108);
-E(ROWSET_LOAD_FAILED, -3109);
E(ROWSET_READER_INIT, -3110);
-E(ROWSET_READ_FAILED, -3111);
E(ROWSET_INVALID_STATE_TRANSITION, -3112);
E(STRING_OVERFLOW_IN_VEC_ENGINE, -3113);
E(ROWSET_ADD_MIGRATION_V2, -3114);
@@ -408,7 +405,7 @@ public:
bool is_io_error() const {
return ErrorCode::IO_ERROR == _code || ErrorCode::READ_UNENOUGH ==
_code ||
ErrorCode::CHECKSUM_ERROR == _code ||
ErrorCode::FILE_DATA_ERROR == _code ||
- ErrorCode::TEST_FILE_ERROR == _code ||
ErrorCode::ROWBLOCK_READ_INFO_ERROR == _code;
+ ErrorCode::TEST_FILE_ERROR == _code;
}
bool is_invalid_argument() const { return ErrorCode::INVALID_ARGUMENT ==
_code; }
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 20b23c1405..11e1005450 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -58,7 +58,6 @@ public:
const io::FileSystemSPtr& fs() const { return _fs; }
bool is_used() const { return _is_used; }
- void set_is_used(bool is_used) { _is_used = is_used; }
int32_t cluster_id() const { return _cluster_id; }
bool cluster_id_incomplete() const { return _cluster_id_incomplete; }
diff --git a/be/src/olap/file_helper.cpp b/be/src/olap/file_helper.cpp
index fa1bcb576c..a1779484f8 100644
--- a/be/src/olap/file_helper.cpp
+++ b/be/src/olap/file_helper.cpp
@@ -47,9 +47,7 @@ Status FileHandler::open(const string& file_name, int flag) {
return Status::OK();
}
- if (!this->close()) {
- return Status::Error<IO_ERROR>();
- }
+ RETURN_IF_ERROR(this->close());
_fd = ::open(file_name.c_str(), flag);
@@ -74,9 +72,7 @@ Status FileHandler::open_with_mode(const string& file_name,
int flag, int mode)
return Status::OK();
}
- if (!this->close()) {
- return Status::Error<IO_ERROR>();
- }
+ RETURN_IF_ERROR(this->close());
_fd = ::open(file_name.c_str(), flag, mode);
@@ -249,9 +245,7 @@ Status FileHandlerWithBuf::open(const string& file_name,
const char* mode) {
return Status::OK();
}
- if (!this->close()) {
- return Status::Error<IO_ERROR>();
- }
+ RETURN_IF_ERROR(this->close());
_fp = ::fopen(file_name.c_str(), mode);
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index db296fbd16..5f881dbd20 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -134,38 +134,18 @@ Status BetaRowset::get_segments_size(std::vector<size_t>*
segments_size) {
return Status::OK();
}
Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>*
segments) {
- auto fs = _rowset_meta->fs();
- if (!fs || _schema == nullptr) {
- return Status::Error<INIT_FAILED>();
- }
- for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
- auto seg_path = segment_file_path(seg_id);
- std::shared_ptr<segment_v2::Segment> segment;
- io::SegmentCachePathPolicy cache_policy;
- cache_policy.set_cache_path(segment_cache_path(seg_id));
- io::FileReaderOptions
reader_options(io::cache_type_from_string(config::file_cache_type),
- cache_policy);
- auto s = segment_v2::Segment::open(fs, seg_path, seg_id, rowset_id(),
_schema,
- reader_options, &segment);
- if (!s.ok()) {
- LOG(WARNING) << "failed to open segment. " << seg_path << " under
rowset "
- << unique_id() << " : " << s.to_string();
- return Status::Error<ROWSET_LOAD_FAILED>();
- }
- segments->push_back(std::move(segment));
- }
- return Status::OK();
+ return load_segments(0, num_segments(), segments);
}
Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end,
std::vector<segment_v2::SegmentSharedPtr>*
segments) {
+ auto fs = _rowset_meta->fs();
+ if (!fs || _schema == nullptr) {
+ return Status::Error<INIT_FAILED>();
+ }
int64_t seg_id = seg_id_begin;
while (seg_id < seg_id_end) {
DCHECK(seg_id >= 0);
- auto fs = _rowset_meta->fs();
- if (!fs || _schema == nullptr) {
- return Status::Error<INIT_FAILED>();
- }
auto seg_path = segment_file_path(seg_id);
std::shared_ptr<segment_v2::Segment> segment;
io::SegmentCachePathPolicy cache_policy;
@@ -177,7 +157,7 @@ Status BetaRowset::load_segments(int64_t seg_id_begin,
int64_t seg_id_end,
if (!s.ok()) {
LOG(WARNING) << "failed to open segment. " << seg_path << " under
rowset "
<< unique_id() << " : " << s.to_string();
- return Status::Error<ROWSET_LOAD_FAILED>();
+ return s;
}
segments->push_back(std::move(segment));
seg_id++;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index b8bb29698e..fe7eea0bde 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -237,14 +237,12 @@ Status BetaRowsetReader::next_block(vectorized::Block*
block) {
do {
auto s = _iterator->next_batch(block);
if (!s.ok()) {
- if (s.is<END_OF_FILE>()) {
- return Status::Error<END_OF_FILE>();
- } else {
+ if (!s.is<END_OF_FILE>()) {
LOG(WARNING) << "failed to read next block: " << s.to_string();
- return Status::Error<ROWSET_READ_FAILED>();
}
+ return s;
}
- } while (block->rows() == 0);
+ } while (block->empty());
return Status::OK();
}
@@ -255,12 +253,10 @@ Status
BetaRowsetReader::next_block_view(vectorized::BlockView* block_view) {
do {
auto s = _iterator->next_block_view(block_view);
if (!s.ok()) {
- if (s.is<END_OF_FILE>()) {
- return Status::Error<END_OF_FILE>();
- } else {
- LOG(WARNING) << "failed to read next block: " <<
s.to_string();
- return Status::Error<ROWSET_READ_FAILED>();
+ if (!s.is<END_OF_FILE>()) {
+ LOG(WARNING) << "failed to read next block view: " <<
s.to_string();
}
+ return s;
}
} while (block_view->empty());
} else {
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index f4895e74fe..7eba29a083 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -382,7 +382,7 @@ Status BetaRowsetWriter::_load_noncompacted_segments(
_context.tablet_schema,
reader_options, &segment);
if (!s.ok()) {
LOG(WARNING) << "failed to open segment. " << seg_path << ":" <<
s.to_string();
- return Status::Error<ROWSET_LOAD_FAILED>();
+ return s;
}
segments->push_back(std::move(segment));
}
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index cea5856dbb..089242dbd1 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -285,17 +285,6 @@ Status
StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id)
return Status::OK();
}
-void StorageEngine::set_store_used_flag(const string& path, bool is_used) {
- std::lock_guard<std::mutex> l(_store_lock);
- auto it = _store_map.find(path);
- if (it == _store_map.end()) {
- LOG(WARNING) << "store not exist, path=" << path;
- }
-
- it->second->set_is_used(is_used);
- _update_storage_medium_type_count();
-}
-
template <bool include_unused>
std::vector<DataDir*> StorageEngine::get_stores() {
std::vector<DataDir*> stores;
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 3fe49c3246..db113f1d93 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -89,9 +89,6 @@ public:
template <bool include_unused = false>
std::vector<DataDir*> get_stores();
- // @brief 设置root_path是否可用
- void set_store_used_flag(const std::string& root_path, bool is_used);
-
// @brief 获取所有root_path信息
Status get_all_data_dir_info(std::vector<DataDirInfo>* data_dir_infos,
bool need_update);
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index e41b33cce4..555e149f5b 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1434,7 +1434,9 @@ void Tablet::build_tablet_report_info(TTabletInfo*
tablet_info,
// and perform state modification operations.
}
- if (has_version_cross && tablet_state() == TABLET_RUNNING) {
+ if ((has_version_cross || is_io_error_too_times()) && tablet_state() ==
TABLET_RUNNING) {
+ LOG(INFO) << "report " << full_name() << " as bad, version_cross=" <<
has_version_cross
+ << ", ioe times=" << get_io_error_times();
tablet_info->__set_used(false);
}
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index eaac50fbfc..7d079ebea9 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -401,6 +401,14 @@ public:
}
}
+ inline void increase_io_error_times() { ++_io_error_times; }
+
+ inline int64_t get_io_error_times() const { return _io_error_times; }
+
+ inline bool is_io_error_too_times() const {
+ return config::max_tablet_io_errors > 0 && _io_error_times >=
config::max_tablet_io_errors;
+ }
+
Status write_cooldown_meta(const std::shared_ptr<io::RemoteFileSystem>& fs,
UniqueId cooldown_meta_id, const
RowsetMetaSharedPtr& new_rs_meta,
const std::vector<RowsetMetaSharedPtr>&
to_deletes);
@@ -532,6 +540,8 @@ private:
DISALLOW_COPY_AND_ASSIGN(Tablet);
+ int64_t _io_error_times = 0;
+
public:
IntCounter* flush_bytes;
IntCounter* flush_finish_count;
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 3592b48ad5..cffa7fc10a 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -323,9 +323,11 @@ Status TabletMeta::create_from_file(const string&
file_path) {
FileHeader<TabletMetaPB> file_header;
FileHandler file_handler;
- if (file_handler.open(file_path, O_RDONLY) != Status::OK()) {
+ auto open_status = file_handler.open(file_path, O_RDONLY);
+
+ if (!open_status.ok()) {
LOG(WARNING) << "fail to open ordinal file. file=" << file_path;
- return Status::Error<IO_ERROR>();
+ return open_status;
}
// In file_header.unserialize(), it validates file length, signature,
checksum of protobuf.
@@ -399,9 +401,11 @@ Status TabletMeta::save(const string& file_path, const
TabletMetaPB& tablet_meta
FileHeader<TabletMetaPB> file_header;
FileHandler file_handler;
- if (!file_handler.open_with_mode(file_path, O_CREAT | O_WRONLY | O_TRUNC,
S_IRUSR | S_IWUSR)) {
+ auto open_status =
+ file_handler.open_with_mode(file_path, O_CREAT | O_WRONLY |
O_TRUNC, S_IRUSR | S_IWUSR);
+ if (!open_status.ok()) {
LOG(WARNING) << "fail to open header file. file='" << file_path;
- return Status::Error<IO_ERROR>();
+ return open_status;
}
try {
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index bd07512a6b..a57ca2c543 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -259,6 +259,8 @@ public:
bool is_empty_column() { return data.empty(); }
+ bool empty() const { return rows() == 0; }
+
/** Updates SipHash of the Block, using update method of columns.
* Returns hash for block, that could be used to differentiate blocks
* with same structure, but different data.
diff --git a/be/src/vec/olap/vertical_block_reader.cpp
b/be/src/vec/olap/vertical_block_reader.cpp
index e33615393e..e2ffa84a41 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -159,6 +159,9 @@ Status VerticalBlockReader::init(const ReaderParams&
read_params) {
auto status = _init_collect_iter(read_params);
if (!status.ok()) {
+ if (status.is_io_error()) {
+ _tablet->increase_io_error_times();
+ }
return status;
}
diff --git a/be/src/vec/olap/vertical_block_reader.h
b/be/src/vec/olap/vertical_block_reader.h
index 1cd8161c4c..fcd93330a1 100644
--- a/be/src/vec/olap/vertical_block_reader.h
+++ b/be/src/vec/olap/vertical_block_reader.h
@@ -42,7 +42,11 @@ public:
Status init(const ReaderParams& read_params) override;
Status next_block_with_aggregation(Block* block, bool* eof) override {
- return (this->*_next_block_func)(block, eof);
+ auto res = (this->*_next_block_func)(block, eof);
+ if (UNLIKELY(res.is_io_error())) {
+ _tablet->increase_io_error_times();
+ }
+ return res;
}
uint64_t merged_rows() const override {
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index 9400e0e88c..b916bd0d6f 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -421,17 +421,17 @@ TEST_F(TestTablet, rowset_tree_update) {
// Hit a segment, but since we don't have real data, return an internal
error when loading the
// segment.
LOG(INFO) << tablet->lookup_row_key("101", &rowset_ids, &loc,
7).to_string();
- ASSERT_TRUE(tablet->lookup_row_key("101", &rowset_ids, &loc,
7).is<ROWSET_LOAD_FAILED>());
+ ASSERT_TRUE(tablet->lookup_row_key("101", &rowset_ids, &loc,
7).is<IO_ERROR>());
// Key not in range.
ASSERT_TRUE(tablet->lookup_row_key("201", &rowset_ids, &loc,
7).is<NOT_FOUND>());
- ASSERT_TRUE(tablet->lookup_row_key("300", &rowset_ids, &loc,
7).is<ROWSET_LOAD_FAILED>());
+ ASSERT_TRUE(tablet->lookup_row_key("300", &rowset_ids, &loc,
7).is<IO_ERROR>());
// Key not in range.
ASSERT_TRUE(tablet->lookup_row_key("499", &rowset_ids, &loc,
7).is<NOT_FOUND>());
// Version too low.
ASSERT_TRUE(tablet->lookup_row_key("500", &rowset_ids, &loc,
7).is<NOT_FOUND>());
// Hit a segment, but since we don't have real data, return an internal
error when loading the
// segment.
- ASSERT_TRUE(tablet->lookup_row_key("500", &rowset_ids, &loc,
8).is<ROWSET_LOAD_FAILED>());
+ ASSERT_TRUE(tablet->lookup_row_key("500", &rowset_ids, &loc,
8).is<IO_ERROR>());
}
} // namespace doris
diff --git a/docs/en/docs/admin-manual/maint-monitor/be-olap-error-code.md
b/docs/en/docs/admin-manual/maint-monitor/be-olap-error-code.md
index c471ece246..08d66ad09b 100644
--- a/docs/en/docs/admin-manual/maint-monitor/be-olap-error-code.md
+++ b/docs/en/docs/admin-manual/maint-monitor/be-olap-error-code.md
@@ -173,7 +173,6 @@ under the License.
| RowBlock error code | | |
| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock decompression error |
| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | Failed to obtain Block Entry |
-| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | Error reading Rowblock
information |
| Tablet metadata error | | |
| OLAP_ERR_HEADER_ADD_VERSION | -1400 | Tablet metadata increase version |
| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | Tablet metadata deletion version |
@@ -245,9 +244,7 @@ under the License.
| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset already exists |
| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset failed to create read object |
| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset is invalid |
-| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset load failed |
| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset read object initialization
failed |
-| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset read failure |
| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset invalid
transaction state |
| OLAP_ERR_ROWSET_RENAME_FILE_FAILED | -3116 | Rowset failed to rename file |
| OLAP_ERR_SEGCOMPACTION_INIT_READER | -3117 | Segment Compaction failed to
init reader |
diff --git a/docs/zh-CN/docs/admin-manual/maint-monitor/be-olap-error-code.md
b/docs/zh-CN/docs/admin-manual/maint-monitor/be-olap-error-code.md
index 7584c68485..2609295da2 100644
--- a/docs/zh-CN/docs/admin-manual/maint-monitor/be-olap-error-code.md
+++ b/docs/zh-CN/docs/admin-manual/maint-monitor/be-olap-error-code.md
@@ -182,7 +182,6 @@ under the License.
| RowBlock错误代码 | |
|
| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock解压缩错误
|
| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | 获取Block Entry失败
|
-| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | 读取Rowblock信息错误
|
| Tablet元数据错误 | |
|
| OLAP_ERR_HEADER_ADD_VERSION | -1400 | tablet元数据增加版本
|
| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | tablet元数据删除版本
|
@@ -254,9 +253,7 @@ under the License.
| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset已经存在
|
| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset创建读对象失败
|
| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset无效
|
-| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset加载失败
|
| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset读对象初始化失败
|
-| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset读失败
|
| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset无效的事务状态
|
| OLAP_ERR_ROWSET_RENAME_FILE_FAILED | -3116 | Rowset重命名文件失败
|
| OLAP_ERR_SEGCOMPACTION_INIT_READER | -3117 |
SegmentCompaction初始化Reader失败 |
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]