This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 65d45daf8a0 [Bug](coredump) fix regresstion test coredump in multi
thread access map (#31664)
65d45daf8a0 is described below
commit 65d45daf8a04ce97897fec3714d8761dd9576ae8
Author: HappenLee <[email protected]>
AuthorDate: Sat Mar 2 23:38:57 2024 +0800
[Bug](coredump) fix regresstion test coredump in multi thread access map
(#31664)
---
be/src/olap/comparison_predicate.h | 44 ++++++++++++++++++++++----------------
be/src/olap/data_dir.cpp | 2 +-
be/src/olap/olap_common.h | 28 +++++++++++++-----------
be/src/olap/snapshot_manager.cpp | 2 +-
be/src/olap/storage_engine.h | 4 ++--
be/src/olap/tablet.cpp | 3 +--
6 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 826b1414b2a..3673b89a1d0 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -578,24 +578,26 @@ private:
__attribute__((flatten)) int32_t _find_code_from_dictionary_column(
const vectorized::ColumnDictI32& column) const {
- if
(!_segment_id_to_cached_code.contains(column.get_rowset_segment_id())) {
- int32_t code = _is_range() ? column.find_code_by_bound(_value,
_is_greater(), _is_eq())
- : column.find_code(_value);
-
- // Sometimes the dict is not initialized when run comparison
predicate here, for example,
- // the full page is null, then the reader will skip read, so that
the dictionary is not
- // inited. The cached code is wrong during this case, because the
following page maybe not
- // null, and the dict should have items in the future.
- //
- // Cached code may have problems, so that add a config here, if
not opened, then
- // we will return the code and not cache it.
- if (column.is_dict_empty() ||
!config::enable_low_cardinality_cache_code) {
- return code;
- }
- // If the dict is not empty, then the dict is inited and we could
cache the value.
- _segment_id_to_cached_code[column.get_rowset_segment_id()] = code;
+ int32_t code = 0;
+ if (_segment_id_to_cached_code.if_contains(
+ column.get_rowset_segment_id(),
+ [&code](const auto& pair) { code = pair.second; })) {
+ return code;
+ }
+ code = _is_range() ? column.find_code_by_bound(_value, _is_greater(),
_is_eq())
+ : column.find_code(_value);
+ // Sometimes the dict is not initialized when run comparison predicate
here, for example,
+ // the full page is null, then the reader will skip read, so that the
dictionary is not
+ // inited. The cached code is wrong during this case, because the
following page maybe not
+ // null, and the dict should have items in the future.
+ //
+ // Cached code may have problems, so that add a config here, if not
opened, then
+ // we will return the code and not cache it.
+ if (!column.is_dict_empty() &&
config::enable_low_cardinality_cache_code) {
+ _segment_id_to_cached_code.emplace(std::pair
{column.get_rowset_segment_id(), code});
}
- return _segment_id_to_cached_code[column.get_rowset_segment_id()];
+
+ return code;
}
std::string _debug_string() const override {
@@ -604,7 +606,13 @@ private:
return info;
}
- mutable std::map<std::pair<RowsetId, uint32_t>, int32_t>
_segment_id_to_cached_code;
+ mutable phmap::parallel_flat_hash_map<
+ std::pair<RowsetId, uint32_t>, int32_t,
+ phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
+ phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
+ std::allocator<std::pair<const std::pair<RowsetId, uint32_t>,
int32_t>>, 4,
+ std::shared_mutex>
+ _segment_id_to_cached_code;
T _value;
};
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index c770e096eed..7cc57b9fee3 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -799,7 +799,7 @@ void DataDir::_perform_path_gc_by_rowset(const
std::vector<std::string>& tablet_
};
// rowset_id -> is_garbage
- std::unordered_map<RowsetId, bool, HashOfRowsetId> checked_rowsets;
+ std::unordered_map<RowsetId, bool> checked_rowsets;
for (auto&& [rowset_id, filename] : rowsets_not_pending) {
if (auto it = checked_rowsets.find(rowset_id); it !=
checked_rowsets.end()) {
if (it->second) { // Is checked garbage rowset
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 42bad24dfed..c08705861df 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -458,18 +458,7 @@ struct RowsetId {
}
};
-// used for hash-struct of hash_map<RowsetId, Rowset*>.
-struct HashOfRowsetId {
- size_t operator()(const RowsetId& rowset_id) const {
- size_t seed = 0;
- seed = HashUtil::hash64(&rowset_id.hi, sizeof(rowset_id.hi), seed);
- seed = HashUtil::hash64(&rowset_id.mi, sizeof(rowset_id.mi), seed);
- seed = HashUtil::hash64(&rowset_id.lo, sizeof(rowset_id.lo), seed);
- return seed;
- }
-};
-
-using RowsetIdUnorderedSet = std::unordered_set<RowsetId, HashOfRowsetId>;
+using RowsetIdUnorderedSet = std::unordered_set<RowsetId>;
// Extract rowset id from filename, return uninitialized rowset id if filename
is invalid
inline RowsetId extract_rowset_id(std::string_view filename) {
@@ -517,3 +506,18 @@ struct RidAndPos {
using PartialUpdateReadPlan = std::map<RowsetId, std::map<uint32_t,
std::vector<RidAndPos>>>;
} // namespace doris
+
+// This intended to be a "good" hash function. It may change from time to
time.
+template <>
+struct std::hash<doris::RowsetId> {
+ size_t operator()(const doris::RowsetId& rowset_id) const {
+ size_t seed = 0;
+ seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi,
sizeof(rowset_id.hi),
+ seed);
+ seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi,
sizeof(rowset_id.mi),
+ seed);
+ seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo,
sizeof(rowset_id.lo),
+ seed);
+ return seed;
+ }
+};
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 7cfcdcfba6f..e1127fa1610 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -164,7 +164,7 @@ Result<std::vector<PendingRowsetGuard>>
SnapshotManager::convert_rowset_ids(
tablet_schema->init_from_pb(new_tablet_meta_pb.schema());
std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
- std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
+ std::unordered_map<RowsetId, RowsetId> rowset_id_mapping;
guards.reserve(cloned_tablet_meta_pb.rs_metas_size() +
cloned_tablet_meta_pb.stale_rs_metas_size());
for (auto&& visible_rowset : cloned_tablet_meta_pb.rs_metas()) {
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 153c2273305..e1a4c098c48 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -357,13 +357,13 @@ private:
std::atomic_bool _stopped {false};
std::mutex _gc_mutex;
- std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId>
_unused_rowsets;
+ std::unordered_map<RowsetId, RowsetSharedPtr> _unused_rowsets;
PendingRowsetSet _pending_local_rowsets;
PendingRowsetSet _pending_remote_rowsets;
// Hold reference of quering rowsets
std::mutex _quering_rowsets_mutex;
- std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId>
_querying_rowsets;
+ std::unordered_map<RowsetId, RowsetSharedPtr> _querying_rowsets;
// Count the memory consumption of segment compaction tasks.
std::shared_ptr<MemTracker> _segcompaction_mem_tracker;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 72b7b2791df..8a761406ca6 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3438,8 +3438,7 @@ Status Tablet::check_rowid_conversion(
}
std::vector<segment_v2::SegmentSharedPtr> dst_segments;
RETURN_IF_ERROR(_load_rowset_segments(dst_rowset, &dst_segments));
- std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>,
HashOfRowsetId>
- input_rowsets_segment;
+ std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>>
input_rowsets_segment;
VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " <<
dst_segments.size();
for (auto [src_rowset, locations] : location_map) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]