morningman closed pull request #477: Fix get_tablet_stat data race and
base_compaction deletion check bug
URL: https://github.com/apache/incubator-doris/pull/477
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index 3128cb6f..52332d8c 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -142,6 +142,13 @@ OLAPStatus BaseCompaction::run() {
return res;
}
+ if (_validate_delete_file_action() != OLAP_SUCCESS) {
+ LOG(WARNING) << "failed to do base compaction. delete action has
error.";
+ _garbage_collection();
+ return OLAP_ERR_BE_ERROR_DELETE_ACTION;
+ }
+
+
VLOG(3) << "elapsed time of doing base compaction:" <<
stage_watch.get_elapse_time_us();
// 4. make new versions visable.
@@ -157,24 +164,6 @@ OLAPStatus BaseCompaction::run() {
}
_delete_old_files(&unused_olap_indices);
- // validate that delete action is right
- // if error happened, sleep 1 hour. Report a fatal log every 1 minute
- if (_validate_delete_file_action() != OLAP_SUCCESS) {
- int sleep_count = 0;
- while (true) {
- if (sleep_count >= 60) {
- break;
- }
-
- ++sleep_count;
- LOG(FATAL) << "base compaction's delete action has error.sleep 1
minute...";
- sleep(60);
- }
-
- _garbage_collection();
- return OLAP_ERR_BE_ERROR_DELETE_ACTION;
- }
-
_release_base_compaction_lock();
return OLAP_SUCCESS;
diff --git a/be/src/olap/olap_engine.cpp b/be/src/olap/olap_engine.cpp
index 45f3a1bb..e4057c8a 100644
--- a/be/src/olap/olap_engine.cpp
+++ b/be/src/olap/olap_engine.cpp
@@ -1724,21 +1724,23 @@ void OLAPEngine::get_tablet_stat(TTabletStatResult&
result) {
// get current time
int64_t current_time = UnixMillis();
- _tablet_map_lock.rdlock();
- // update cache if too old
- if (current_time - _tablet_stat_cache_update_time_ms >
- config::tablet_stat_cache_update_interval_second * 1000) {
- VLOG(3) << "update tablet stat.";
- _build_tablet_stat();
+ {
+ std::lock_guard<std::mutex> l(_tablet_stat_mutex);
+ // update cache if too old
+ if (current_time - _tablet_stat_cache_update_time_ms >
+ config::tablet_stat_cache_update_interval_second * 1000) {
+ VLOG(3) << "update tablet stat.";
+ _build_tablet_stat();
+ }
}
result.__set_tablets_stats(_tablet_stat_cache);
-
- _tablet_map_lock.unlock();
}
void OLAPEngine::_build_tablet_stat() {
_tablet_stat_cache.clear();
+
+ _tablet_map_lock.rdlock();
for (const auto& item : _tablet_map) {
if (item.second.table_arr.size() == 0) {
continue;
@@ -1762,6 +1764,7 @@ void OLAPEngine::_build_tablet_stat() {
_tablet_stat_cache.emplace(item.first, stat);
}
+ _tablet_map_lock.unlock();
_tablet_stat_cache_update_time_ms = UnixMillis();
}
diff --git a/be/src/olap/olap_engine.h b/be/src/olap/olap_engine.h
index 9dc333c3..cbaf87a3 100644
--- a/be/src/olap/olap_engine.h
+++ b/be/src/olap/olap_engine.h
@@ -556,6 +556,7 @@ class OLAPEngine {
// cache to save tablets' statistics, such as data size and row
// TODO(cmy): for now, this is a naive implementation
std::map<int64_t, TTabletStat> _tablet_stat_cache;
+ std::mutex _tablet_stat_mutex;
// last update time of tablet stat cache
int64_t _tablet_stat_cache_update_time_ms;
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]