HappenLee commented on code in PR #11985:
URL: https://github.com/apache/doris/pull/11985#discussion_r954572208
##########
be/src/olap/in_list_predicate.h:
##########
@@ -325,6 +386,8 @@ class InListPredicateBase : public ColumnPredicate {
phmap::flat_hash_set<T> _values;
mutable std::vector<vectorized::UInt8> _value_in_dict_flags;
+ T _min_value;
Review Comment:
seems we should init the value in construct to effect the value ?
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -202,22 +203,30 @@ void ColumnReader::_parse_zone_map(const ZoneMapPB&
zone_map, WrapperField* min_
}
}
-bool ColumnReader::_zone_map_match_condition(const ZoneMapPB& zone_map,
- WrapperField* min_value_container,
- WrapperField* max_value_container,
- CondColumn* cond) const {
+bool ColumnReader::_zone_map_match_condition(
+ const ZoneMapPB& zone_map, WrapperField* min_value_container,
+ WrapperField* max_value_container,
+ const std::vector<ColumnPredicate*>& col_predicates) const {
if (!zone_map.has_not_null() && !zone_map.has_null()) {
return false; // no data in this zone
}
- if (cond == nullptr || zone_map.pass_all()) {
+ if (col_predicates.empty() || zone_map.pass_all()) {
return true;
}
- return cond->eval({min_value_container, max_value_container});
+ AndBlockColumnPredicate condition_predicates;
+
+ for (auto& cond : col_predicates) {
+ auto single_column_block_predicate = new
SingleColumnBlockPredicate(cond);
Review Comment:
rethink each time we need to do a `new` operator ?maybe the work can do just
one time
##########
be/src/olap/in_list_predicate.h:
##########
@@ -325,6 +386,8 @@ class InListPredicateBase : public ColumnPredicate {
phmap::flat_hash_set<T> _values;
mutable std::vector<vectorized::UInt8> _value_in_dict_flags;
+ T _min_value;
Review Comment:
seems we should init the value in construct to effect the value ?
##########
be/src/olap/comparison_predicate.h:
##########
@@ -199,6 +201,101 @@ class ComparisonPredicateBase : public ColumnPredicate {
_evaluate_bit<true>(column, sel, size, flags);
}
+ bool evaluate_and(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
+ if (statistic.first == nullptr || statistic.second == nullptr) {
+ return true;
+ }
+ if (statistic.first->is_null()) {
+ return true;
+ }
+ if constexpr (PT == PredicateType::EQ) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_min_uint32_value = 0;
+ memcpy((char*)(&tmp_min_uint32_value),
statistic.first->cell_ptr(),
+ sizeof(uint24_t));
+ T tmp_max_uint32_value = 0;
+ memcpy((char*)(&tmp_max_uint32_value),
statistic.second->cell_ptr(),
+ sizeof(uint24_t));
+ return tmp_min_uint32_value <= _value && tmp_max_uint32_value
>= _value;
+ } else if constexpr (std::is_same_v<T, StringValue>) {
+ auto min = reinterpret_cast<const
Slice*>(statistic.first->cell_ptr());
+ auto max = reinterpret_cast<const
Slice*>(statistic.second->cell_ptr());
+ return StringValue(min->data, min->size) <= _value &&
+ StringValue(max->data, max->size) >= _value;
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) <= _value &&
+ *reinterpret_cast<const
T*>(statistic.second->cell_ptr()) >= _value;
+ }
+ } else if constexpr (PT == PredicateType::NE) {
+ return true;
+ } else if constexpr (PT == PredicateType::LT) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_uint32_value = 0;
+ memcpy((char*)(&tmp_uint32_value),
statistic.first->cell_ptr(), sizeof(uint24_t));
+ return tmp_uint32_value < _value;
+ } else if constexpr (std::is_same_v<T, StringValue>) {
+ auto min = reinterpret_cast<const
Slice*>(statistic.first->cell_ptr());
+ return StringValue(min->data, min->size) < _value;
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) < _value;
+ }
+ } else if constexpr (PT == PredicateType::LE) {
+ if constexpr (Type == TYPE_DATE) {
Review Comment:
use function of #DEFINE to simple the code of ```LE, GT, GE, LT```
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -302,22 +303,22 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
}
Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges*
condition_row_ranges) {
- std::set<int32_t> uids;
- if (_opts.conditions != nullptr) {
- for (auto& column_condition : _opts.conditions->columns()) {
- uids.insert(column_condition.first);
- }
+ std::set<int32_t> cids;
+ for (auto& entry : _opts.col_id_to_predicates) {
+ cids.insert(entry.first);
}
// first filter data by bloom filter index
// bloom filter index only use CondColumn
RowRanges bf_row_ranges = RowRanges::create_single(num_rows());
- for (auto& uid : uids) {
+ for (auto& cid : cids) {
// get row ranges by bf index of this column,
RowRanges column_bf_row_ranges = RowRanges::create_single(num_rows());
- CondColumn* column_cond = _opts.conditions->get_column(uid);
- RETURN_IF_ERROR(_column_iterators[uid]->get_row_ranges_by_bloom_filter(
- column_cond, &column_bf_row_ranges));
+ if (_opts.col_id_to_predicates.count(cid) < 1) {
Review Comment:
which case the count lower than 1 ?
##########
be/src/olap/comparison_predicate.h:
##########
@@ -199,6 +201,101 @@ class ComparisonPredicateBase : public ColumnPredicate {
_evaluate_bit<true>(column, sel, size, flags);
}
+ bool evaluate_and(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
+ if (statistic.first == nullptr || statistic.second == nullptr) {
+ return true;
+ }
+ if (statistic.first->is_null()) {
+ return true;
+ }
+ if constexpr (PT == PredicateType::EQ) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_min_uint32_value = 0;
+ memcpy((char*)(&tmp_min_uint32_value),
statistic.first->cell_ptr(),
+ sizeof(uint24_t));
+ T tmp_max_uint32_value = 0;
+ memcpy((char*)(&tmp_max_uint32_value),
statistic.second->cell_ptr(),
+ sizeof(uint24_t));
+ return tmp_min_uint32_value <= _value && tmp_max_uint32_value
>= _value;
+ } else if constexpr (std::is_same_v<T, StringValue>) {
+ auto min = reinterpret_cast<const
Slice*>(statistic.first->cell_ptr());
+ auto max = reinterpret_cast<const
Slice*>(statistic.second->cell_ptr());
+ return StringValue(min->data, min->size) <= _value &&
+ StringValue(max->data, max->size) >= _value;
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) <= _value &&
+ *reinterpret_cast<const
T*>(statistic.second->cell_ptr()) >= _value;
+ }
+ } else if constexpr (PT == PredicateType::NE) {
+ return true;
Review Comment:
if `a != 10` and first == second == 10, we can filter the page
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]