This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 9519406fc7 [improvement](predicate) Cache the dict code in
ComparisonPredicate (pick #17684 from master) (#17939)
9519406fc7 is described below
commit 9519406fc7858f77d2547fb51a42637e694fb43f
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Mar 21 09:09:31 2023 +0800
[improvement](predicate) Cache the dict code in ComparisonPredicate (pick
#17684 from master) (#17939)
---
be/src/olap/column_predicate.h | 5 ++
be/src/olap/comparison_predicate.h | 72 ++++++++++++++++------
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 16 ++++-
be/src/olap/rowset/segment_v2/segment_iterator.h | 2 +
4 files changed, 73 insertions(+), 22 deletions(-)
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index d1fc57d2a2..d6227fae79 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -171,6 +171,11 @@ public:
", opposite=" + (_opposite ? "true" : "false");
}
+ /// Some predicates need to be cloned for each segment.
+ virtual bool need_to_clone() const { return false; }
+
+ virtual void clone(ColumnPredicate** to) const { LOG(FATAL) << "clone not
supported"; }
+
protected:
// Just prevent access not align memory address coredump
template <class T>
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 051dd2d270..599bbd50ab 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -31,7 +31,15 @@ class ComparisonPredicateBase : public ColumnPredicate {
public:
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite
= false)
- : ColumnPredicate(column_id, opposite), _value(value) {}
+ : ColumnPredicate(column_id, opposite),
+ _cached_code(_InvalidateCodeValue),
+ _value(value) {}
+
+ void clone(ColumnPredicate** to) const override {
+ *to = new ComparisonPredicateBase(_column_id, _value, _opposite);
+ }
+
+ bool need_to_clone() const override { return true; }
PredicateType type() const override { return PT; }
@@ -334,13 +342,20 @@ public:
auto* dict_column_ptr =
vectorized::check_and_get_column<vectorized::ColumnDictI32>(
nested_column);
- auto dict_code = _is_range() ?
dict_column_ptr->find_code_by_bound(
- _value,
_is_greater(), _is_eq())
- :
dict_column_ptr->find_code(_value);
- auto* data_array = dict_column_ptr->get_data().data();
- _base_loop_vec<true, is_and>(size, flags, null_map.data(),
data_array,
- dict_code);
+ auto dict_code =
_find_code_from_dictionary_column(*dict_column_ptr);
+ do {
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2) {
+ memset(flags, 0, size);
+ break;
+ }
+ }
+ auto* data_array = dict_column_ptr->get_data().data();
+
+ _base_loop_vec<true, is_and>(size, flags,
null_map.data(), data_array,
+ dict_code);
+ } while (false);
} else {
LOG(FATAL) << "column_dictionary must use StringValue
predicate.";
}
@@ -357,12 +372,18 @@ public:
if constexpr (std::is_same_v<T, StringValue>) {
auto* dict_column_ptr =
vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
- auto dict_code = _is_range() ?
dict_column_ptr->find_code_by_bound(
- _value,
_is_greater(), _is_eq())
- :
dict_column_ptr->find_code(_value);
- auto* data_array = dict_column_ptr->get_data().data();
-
- _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, dict_code);
+ auto dict_code =
_find_code_from_dictionary_column(*dict_column_ptr);
+ do {
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2) {
+ memset(flags, 0, size);
+ break;
+ }
+ }
+ auto* data_array = dict_column_ptr->get_data().data();
+
+ _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, dict_code);
+ } while (false);
} else {
LOG(FATAL) << "column_dictionary must use StringValue
predicate.";
}
@@ -537,9 +558,7 @@ private:
auto* dict_column_ptr =
vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
auto* data_array = dict_column_ptr->get_data().data();
- auto dict_code = _is_range() ?
dict_column_ptr->find_code_by_bound(
- _value, _operator(1,
0), _operator(1, 1))
- :
dict_column_ptr->find_code(_value);
+ auto dict_code =
_find_code_from_dictionary_column(*dict_column_ptr);
_base_loop_bit<is_nullable, is_and>(sel, size, flags,
null_map, data_array,
dict_code);
} else {
@@ -583,9 +602,13 @@ private:
auto* dict_column_ptr =
vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
auto* data_array = dict_column_ptr->get_data().data();
- auto dict_code = _is_range() ?
dict_column_ptr->find_code_by_bound(
- _value, _is_greater(),
_is_eq())
- :
dict_column_ptr->find_code(_value);
+ auto dict_code =
_find_code_from_dictionary_column(*dict_column_ptr);
+
+ if constexpr (PT == PredicateType::EQ) {
+ if (dict_code == -2) {
+ return _opposite ? size : 0;
+ }
+ }
return _base_loop<is_nullable>(sel, size, null_map,
data_array, dict_code);
} else {
@@ -603,12 +626,23 @@ private:
}
}
+ __attribute__((flatten)) int32_t _find_code_from_dictionary_column(
+ const vectorized::ColumnDictI32& column) const {
+ if (UNLIKELY(_cached_code == _InvalidateCodeValue)) {
+ _cached_code = _is_range() ? column.find_code_by_bound(_value,
_is_greater(), _is_eq())
+ : column.find_code(_value);
+ }
+ return _cached_code;
+ }
+
std::string _debug_string() const override {
std::string info =
"ComparisonPredicateBase(" + type_to_string(Type) + ", " +
type_to_string(PT) + ")";
return info;
}
+ static constexpr int32_t _InvalidateCodeValue =
std::numeric_limits<int32_t>::max();
+ mutable int32_t _cached_code;
T _value;
};
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 72b6026e65..ea21e6a50b 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -151,7 +151,8 @@ SegmentIterator::SegmentIterator(std::shared_ptr<Segment>
segment, const Schema&
_cur_rowid(0),
_lazy_materialization_read(false),
_inited(false),
- _estimate_row_size(true) {}
+ _estimate_row_size(true),
+ _pool(new ObjectPool) {}
SegmentIterator::~SegmentIterator() {
for (auto iter : _column_iterators) {
@@ -164,9 +165,18 @@ SegmentIterator::~SegmentIterator() {
Status SegmentIterator::init(const StorageReadOptions& opts) {
_opts = opts;
- if (!opts.column_predicates.empty()) {
- _col_predicates = opts.column_predicates;
+
+ for (auto& predicate : opts.column_predicates) {
+ if (predicate->need_to_clone()) {
+ ColumnPredicate* cloned;
+ predicate->clone(&cloned);
+ _pool->add(cloned);
+ _col_predicates.emplace_back(cloned);
+ } else {
+ _col_predicates.emplace_back(predicate);
+ }
}
+
// Read options will not change, so that just resize here
_block_rowids.resize(_opts.block_row_max);
return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index ad4fc13029..ce2d5d5269 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -241,6 +241,8 @@ private:
// used for compaction, record selectd rowids of current batch
uint16_t _selected_size;
vector<uint16_t> _sel_rowid_idx;
+
+ std::unique_ptr<ObjectPool> _pool;
};
} // namespace segment_v2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]