This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 43915936b6 [refactor] add evaluate_and_vec() for
ComparisonPredicateBase (#10631)
43915936b6 is described below
commit 43915936b6857f8becc3ab06476abbe2ec4ae6db
Author: minghong <[email protected]>
AuthorDate: Fri Jul 8 14:47:37 2022 +0800
[refactor] add evaluate_and_vec() for ComparisonPredicateBase (#10631)
---
be/src/olap/column_predicate.h | 5 +-
be/src/olap/comparison_predicate.h | 54 ++++++++++++++++------
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 +++++------
be/src/olap/rowset/segment_v2/segment_iterator.h | 25 +++++++++-
4 files changed, 80 insertions(+), 32 deletions(-)
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index be7a1bdc59..b2480fe672 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -92,7 +92,10 @@ public:
virtual void evaluate_vec(const vectorized::IColumn& column, uint16_t
size, bool* flags) const {
DCHECK(false) << "should not reach here";
}
-
+ virtual void evaluate_and_vec(const vectorized::IColumn& column, uint16_t
size,
+ bool* flags) const {
+ DCHECK(false) << "should not reach here";
+ }
uint32_t column_id() const { return _column_id; }
protected:
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index f0401aa4cc..8d1c0ff604 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -160,8 +160,9 @@ public:
_evaluate_bit<false>(column, sel, size, flags);
}
- void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
- bool* flags) const override {
+ template <bool is_and>
+ __attribute__((flatten)) void _evaluate_vec_internal(const
vectorized::IColumn& column,
+ uint16_t size, bool*
flags) const {
if (column.is_nullable()) {
auto* nullable_column_ptr =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -180,7 +181,8 @@ public:
:
dict_column_ptr->find_code(_value);
auto* data_array = dict_column_ptr->get_data().data();
- _base_loop_vec<true>(size, flags, null_map.data(),
data_array, dict_code);
+ _base_loop_vec<true, is_and>(size, flags, null_map.data(),
data_array,
+ dict_code);
} else {
LOG(FATAL) << "column_dictionary must use StringValue
predicate.";
}
@@ -190,7 +192,7 @@ public:
.get_data()
.data();
- _base_loop_vec<true>(size, flags, null_map.data(), data_array,
_value_real);
+ _base_loop_vec<true, is_and>(size, flags, null_map.data(),
data_array, _value_real);
}
} else {
if (column.is_column_dictionary()) {
@@ -202,7 +204,7 @@ public:
:
dict_column_ptr->find_code(_value);
auto* data_array = dict_column_ptr->get_data().data();
- _base_loop_vec<false>(size, flags, nullptr, data_array,
dict_code);
+ _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, dict_code);
} else {
LOG(FATAL) << "column_dictionary must use StringValue
predicate.";
}
@@ -213,7 +215,7 @@ public:
->get_data()
.data();
- _base_loop_vec<false>(size, flags, nullptr, data_array,
_value_real);
+ _base_loop_vec<false, is_and>(size, flags, nullptr,
data_array, _value_real);
}
}
@@ -224,6 +226,16 @@ public:
}
}
+ void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
+ bool* flags) const override {
+ _evaluate_vec_internal<false>(column, size, flags);
+ }
+
+ void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
+ bool* flags) const override {
+ _evaluate_vec_internal<true>(column, size, flags);
+ }
+
private:
using TReal = std::conditional_t<std::is_same_v<T, uint24_t>, uint32_t, T>;
@@ -313,14 +325,28 @@ private:
}
}
- template <bool is_nullable, typename TArray, typename TValue>
- void _base_loop_vec(uint16_t size, bool* __restrict flags, const uint8_t*
__restrict null_map,
- const TArray* __restrict data_array, const TValue&
value) const {
- for (uint16_t i = 0; i < size; i++) {
- if constexpr (is_nullable) {
- flags[i] = !null_map[i] && _operator(data_array[i], value);
- } else {
- flags[i] = _operator(data_array[i], value);
+ template <bool is_nullable, bool is_and, typename TArray, typename TValue>
+ __attribute__((flatten)) void _base_loop_vec(uint16_t size, bool*
__restrict bflags,
+ const uint8_t* __restrict
null_map,
+ const TArray* __restrict
data_array,
+ const TValue& value) const {
+ //uint8_t helps compiler to generate vectorized code
+ uint8_t* flags = reinterpret_cast<uint8_t*>(bflags);
+ if constexpr (is_and) {
+ for (uint16_t i = 0; i < size; i++) {
+ if constexpr (is_nullable) {
+ flags[i] &= (uint8_t)(!null_map[i] &&
_operator(data_array[i], value));
+ } else {
+ flags[i] &= (uint8_t)_operator(data_array[i], value);
+ }
+ }
+ } else {
+ for (uint16_t i = 0; i < size; i++) {
+ if constexpr (is_nullable) {
+ flags[i] = !null_map[i] && _operator(data_array[i], value);
+ } else {
+ flags[i] = _operator(data_array[i], value);
+ }
}
}
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index d1a70933c9..0e81609005 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -661,11 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
// Step1: check pred using short eval or vec eval
if (_can_evaluated_by_vectorized(predicate)) {
vec_pred_col_id_set.insert(predicate->column_id());
- if (_pre_eval_block_predicate == nullptr) {
- _pre_eval_block_predicate.reset(new
AndBlockColumnPredicate());
- }
- _pre_eval_block_predicate->add_column_predicate(
- new SingleColumnBlockPredicate(predicate));
+ _pre_eval_block_predicate.push_back(predicate);
} else {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
@@ -879,8 +875,16 @@ uint16_t
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
}
uint16_t original_size = selected_size;
- bool ret_flags[selected_size];
- _pre_eval_block_predicate->evaluate_vec(_current_return_columns,
selected_size, ret_flags);
+ bool ret_flags[original_size];
+ DCHECK(_pre_eval_block_predicate.size() > 0);
+ auto column_id = _pre_eval_block_predicate[0]->column_id();
+ auto& column = _current_return_columns[column_id];
+ _pre_eval_block_predicate[0]->evaluate_vec(*column, original_size,
ret_flags);
+ for (int i = 1; i < _pre_eval_block_predicate.size(); i++) {
+ auto column_id2 = _pre_eval_block_predicate[i]->column_id();
+ auto& column2 = _current_return_columns[column_id2];
+ _pre_eval_block_predicate[i]->evaluate_and_vec(*column2,
original_size, ret_flags);
+ }
uint16_t new_size = 0;
@@ -928,15 +932,6 @@ uint16_t
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro
for (auto predicate : _short_cir_eval_predicate) {
auto column_id = predicate->column_id();
auto& short_cir_column = _current_return_columns[column_id];
- auto* col_ptr = short_cir_column.get();
-
- // Dictionary column should do something to initial.
- if (PredicateTypeTraits::is_range(predicate->type())) {
- col_ptr->convert_dict_codes_if_necessary();
- } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
- col_ptr->generate_hash_values_for_runtime_filter();
- }
-
selected_size = predicate->evaluate(*short_cir_column,
vec_sel_rowid_idx, selected_size);
}
_opts.stats->rows_vec_cond_filtered += original_size - selected_size;
@@ -1024,6 +1019,7 @@ Status SegmentIterator::next_batch(vectorized::Block*
block) {
if (!_is_need_vec_eval && !_is_need_short_eval) {
_output_non_pred_columns(block);
} else {
+ _convert_dict_code_for_predicate_if_necessary();
uint16_t selected_size = nrows_read;
uint16_t sel_rowid_idx[selected_size];
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index b56baf2888..195307d9f5 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -122,6 +122,29 @@ private:
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
+ // Dictionary column should do something to initial.
+ void _convert_dict_code_for_predicate_if_necessary() {
+ for (auto predicate : _short_cir_eval_predicate) {
+ auto& column = _current_return_columns[predicate->column_id()];
+ auto* col_ptr = column.get();
+ if (PredicateTypeTraits::is_range(predicate->type())) {
+ col_ptr->convert_dict_codes_if_necessary();
+ } else if
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+ col_ptr->generate_hash_values_for_runtime_filter();
+ }
+ }
+
+ for (auto predicate : _pre_eval_block_predicate) {
+ auto& column = _current_return_columns[predicate->column_id()];
+ auto* col_ptr = column.get();
+ if (PredicateTypeTraits::is_range(predicate->type())) {
+ col_ptr->convert_dict_codes_if_necessary();
+ } else if
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+ col_ptr->generate_hash_values_for_runtime_filter();
+ }
+ }
+ }
+
private:
class BitmapRangeIterator;
@@ -159,7 +182,7 @@ private:
_short_cir_pred_column_ids; // keep columnId of columns for short
circuit predicate evaluation
std::vector<bool> _is_pred_column; // columns hold by segmentIter
vectorized::MutableColumns _current_return_columns;
- std::unique_ptr<AndBlockColumnPredicate> _pre_eval_block_predicate;
+ std::vector<ColumnPredicate*> _pre_eval_block_predicate;
std::vector<ColumnPredicate*> _short_cir_eval_predicate;
// when lazy materialization is enable, segmentIter need to read data at
least twice
// first, read predicate columns by various index
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]