This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 57327e6236 [improvement]Separate input and output parameters in
ColumnPredicate (#10249)
57327e6236 is described below
commit 57327e6236e4ed5943f976dec54bf10710f445e1
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Jun 20 15:04:57 2022 +0800
[improvement]Separate input and output parameters in ColumnPredicate
(#10249)
```cpp
for (uint16_t i = 0; i < *size; ++i) {
// some code here
}
```
The value of size is read for each conditional test, which also prevents
possible vectorization.
---
be/src/olap/block_column_predicate.cpp | 29 +++++-----
be/src/olap/block_column_predicate.h | 18 ++++---
be/src/olap/bloom_filter_predicate.h | 20 +++----
be/src/olap/column_predicate.h | 4 +-
be/src/olap/comparison_predicate.cpp | 62 +++++++++++-----------
be/src/olap/comparison_predicate.h | 3 +-
be/src/olap/in_list_predicate.h | 22 ++++----
be/src/olap/null_predicate.cpp | 9 ++--
be/src/olap/null_predicate.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 33 ++++++------
be/src/olap/rowset/segment_v2/segment_iterator.h | 4 +-
11 files changed, 107 insertions(+), 99 deletions(-)
diff --git a/be/src/olap/block_column_predicate.cpp
b/be/src/olap/block_column_predicate.cpp
index 5fce8beebe..fc35bd8de9 100644
--- a/be/src/olap/block_column_predicate.cpp
+++ b/be/src/olap/block_column_predicate.cpp
@@ -41,11 +41,11 @@ void SingleColumnBlockPredicate::evaluate_or(RowBlockV2*
block, uint16_t selecte
_predicate->evaluate_or(&column_block, block->selection_vector(),
selected_size, flags);
}
-void SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& block,
uint16_t* sel,
- uint16_t* selected_size) const {
+uint16_t SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns&
block, uint16_t* sel,
+ uint16_t selected_size) const {
auto column_id = _predicate->column_id();
auto& column = block[column_id];
- _predicate->evaluate(*column, sel, selected_size);
+ return _predicate->evaluate(*column, sel, selected_size);
}
void SingleColumnBlockPredicate::evaluate_and(vectorized::MutableColumns&
block, uint16_t* sel,
@@ -90,25 +90,25 @@ void OrBlockColumnPredicate::evaluate(RowBlockV2* block,
uint16_t* selected_size
}
}
-void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block,
uint16_t* sel,
- uint16_t* selected_size) const {
+uint16_t OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block,
uint16_t* sel,
+ uint16_t selected_size) const {
if (num_of_column_predicate() == 1) {
- _block_column_predicate_vec[0]->evaluate(block, sel, selected_size);
+ return _block_column_predicate_vec[0]->evaluate(block, sel,
selected_size);
} else {
- bool ret_flags[*selected_size];
- memset(ret_flags, false, *selected_size);
+ bool ret_flags[selected_size];
+ memset(ret_flags, false, selected_size);
for (int i = 0; i < num_of_column_predicate(); ++i) {
auto column_predicate = _block_column_predicate_vec[i];
- column_predicate->evaluate_or(block, sel, *selected_size,
ret_flags);
+ column_predicate->evaluate_or(block, sel, selected_size,
ret_flags);
}
uint16_t new_size = 0;
- for (int i = 0; i < *selected_size; ++i) {
+ for (int i = 0; i < selected_size; ++i) {
if (ret_flags[i]) {
sel[new_size++] = sel[i];
}
}
- *selected_size = new_size;
+ return new_size;
}
}
@@ -168,11 +168,12 @@ void AndBlockColumnPredicate::evaluate(RowBlockV2* block,
uint16_t* selected_siz
}
}
-void AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block,
uint16_t* sel,
- uint16_t* selected_size) const {
+uint16_t AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block,
uint16_t* sel,
+ uint16_t selected_size) const {
for (auto block_column_predicate : _block_column_predicate_vec) {
- block_column_predicate->evaluate(block, sel, selected_size);
+ selected_size = block_column_predicate->evaluate(block, sel,
selected_size);
}
+ return selected_size;
}
void AndBlockColumnPredicate::evaluate_and(RowBlockV2* block, uint16_t
selected_size,
diff --git a/be/src/olap/block_column_predicate.h
b/be/src/olap/block_column_predicate.h
index 4bd4d7baf8..219a92c3aa 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -43,8 +43,10 @@ public:
virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const =
0;
- virtual void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
- uint16_t* selected_size) const {};
+ virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+ uint16_t selected_size) const {
+ return selected_size;
+ }
virtual void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size, bool* flags) const {};
virtual void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel,
@@ -66,8 +68,8 @@ public:
column_id_set.insert(_predicate->column_id());
};
- void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
- uint16_t* selected_size) const override;
+ uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+ uint16_t selected_size) const override;
void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
bool* flags) const override;
void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
@@ -115,8 +117,8 @@ public:
void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
- void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
- uint16_t* selected_size) const override;
+ uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+ uint16_t selected_size) const override;
void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
bool* flags) const override;
void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
@@ -135,8 +137,8 @@ public:
// 2.Evaluate OR SEMANTICS in flags use 1 result to get proper select flags
void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags)
const override;
- void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
- uint16_t* selected_size) const override;
+ uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+ uint16_t selected_size) const override;
void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
bool* flags) const override;
void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
diff --git a/be/src/olap/bloom_filter_predicate.h
b/be/src/olap/bloom_filter_predicate.h
index eac480ccea..9e6901cbfb 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -65,7 +65,7 @@ public:
return Status::OK();
}
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)
const override;
+ uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t
size) const override;
private:
std::shared_ptr<IBloomFilterFuncBase> _filter;
@@ -111,12 +111,12 @@ void BloomFilterColumnPredicate<T>::evaluate(ColumnBlock*
block, uint16_t* sel,
}
template <PrimitiveType T>
-void BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column,
uint16_t* sel,
- uint16_t* size) const {
+uint16_t BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column,
uint16_t* sel,
+ uint16_t size) const {
uint16_t new_size = 0;
using FT = typename PredicatePrimitiveTypeTraits<T>::PredicateFieldType;
if (!_enable_pred) {
- return;
+ return size;
}
if (column.is_nullable()) {
auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -127,7 +127,7 @@ void
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
nullable_col->get_nested_column());
const_cast<vectorized::ColumnDictI32*>(dict_col)
->generate_hash_values_for_runtime_filter();
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size += (!null_map_data[idx]) &&
@@ -137,7 +137,7 @@ void
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
auto* pred_col =
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(
nullable_col->get_nested_column());
auto& pred_col_data = pred_col->get_data();
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
const auto* cell_value = reinterpret_cast<const
void*>(&(pred_col_data[idx]));
@@ -147,7 +147,7 @@ void
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
} else if (column.is_column_dictionary()) {
auto* dict_col =
vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
const_cast<vectorized::ColumnDictI32*>(dict_col)->generate_hash_values_for_runtime_filter();
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size +=
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
@@ -156,7 +156,7 @@ void
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
auto* pred_col =
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(column);
auto& pred_col_data = pred_col->get_data();
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
const auto* cell_value = reinterpret_cast<const
void*>(&(pred_col_data[idx]));
@@ -166,14 +166,14 @@ void
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
// If the pass rate is very high, for example > 50%, then the bloomfilter
is useless.
// Some bloomfilter is useless, for example ssb 4.3, it consumes a lot of
cpu but it is
// useless.
- _evaluated_rows += *size;
+ _evaluated_rows += size;
_passed_rows += new_size;
if (_evaluated_rows > config::bloom_filter_predicate_check_row_num) {
if (_passed_rows / (_evaluated_rows * 1.0) > 0.5) {
_enable_pred = false;
}
}
- *size = new_size;
+ return new_size;
}
class BloomFilterColumnPredicateFactory {
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 7de6d1b6eb..86580875e6 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -73,7 +73,9 @@ public:
// evaluate predicate on IColumn
// a short circuit eval way
- virtual void evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t* size) const {};
+ virtual uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const {
+ return size;
+ };
virtual void evaluate_and(vectorized::IColumn& column, uint16_t* sel,
uint16_t size,
bool* flags) const {};
virtual void evaluate_or(vectorized::IColumn& column, uint16_t* sel,
uint16_t size,
diff --git a/be/src/olap/comparison_predicate.cpp
b/be/src/olap/comparison_predicate.cpp
index 2223d1fe19..fc50c354fd 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -149,7 +149,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
// by this way, avoid redundant code
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)
\
template <class T>
\
- void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t* size) const { \
+ uint16_t CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const { \
uint16_t new_size = 0;
\
if (column.is_nullable()) {
\
auto* nullable_col =
\
@@ -166,7 +166,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto dict_code =
\
IS_RANGE ?
nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: nested_col_ptr->find_code(_value);
\
- for (uint16_t i = 0; i < *size; i++) {
\
+ for (uint16_t i = 0; i < size; i++) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = data_array[idx];
\
@@ -179,7 +179,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>( \
nested_col);
\
auto& data_array = nested_col_ptr->get_data();
\
- for (uint16_t i = 0; i < *size; i++) {
\
+ for (uint16_t i = 0; i < size; i++) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = reinterpret_cast<const
T&>(data_array[idx]); \
@@ -195,7 +195,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto& data_array = dict_col.get_data();
\
auto dict_code = IS_RANGE ?
dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: dict_col.find_code(_value);
\
- for (uint16_t i = 0; i < *size; ++i) {
\
+ for (uint16_t i = 0; i < size; ++i) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = data_array[idx];
\
@@ -206,7 +206,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
} else {
\
auto& pred_column_ref =
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
auto& data_array = pred_column_ref.get_data();
\
- for (uint16_t i = 0; i < *size; i++) {
\
+ for (uint16_t i = 0; i < size; i++) {
\
uint16_t idx = sel[i];
\
sel[new_size] = idx;
\
const auto& cell_value = reinterpret_cast<const
T&>(data_array[idx]); \
@@ -214,7 +214,7 @@
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
new_size += _opposite ? !ret : ret;
\
}
\
}
\
- *size = new_size;
\
+ return new_size;
\
}
COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
@@ -609,31 +609,31 @@
COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(LessEqualPredicate)
COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterPredicate)
COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterEqualPredicate)
-#define COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(CLASS)
\
- template void CLASS<int8_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<int16_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<int32_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<int64_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<int128_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<float>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<double>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<decimal12_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<StringValue>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<uint24_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<uint64_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
- uint16_t* size) const;
\
- template void CLASS<bool>::evaluate(vectorized::IColumn& column, uint16_t*
sel, \
- uint16_t* size) const;
+#define COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(CLASS)
\
+ template uint16_t CLASS<int8_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<int16_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<int32_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<int64_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<int128_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<float>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<double>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<decimal12_t>::evaluate(vectorized::IColumn&
column, uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<StringValue>::evaluate(vectorized::IColumn&
column, uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<uint24_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<uint64_t>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
\
+ template uint16_t CLASS<bool>::evaluate(vectorized::IColumn& column,
uint16_t* sel, \
+ uint16_t size) const;
COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(EqualPredicate)
COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(NotEqualPredicate)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 54ddd11e50..40b93fb888 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -40,7 +40,8 @@ class VectorizedRowBatch;
virtual Status evaluate(const Schema& schema,
\
const std::vector<BitmapIndexIterator*>&
iterators, \
uint32_t num_rows, roaring::Roaring* roaring)
const override; \
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t*
size) const override; \
+ uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel,
\
+ uint16_t size) const override;
\
void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t
size, \
bool* flags) const override;
\
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t
size, \
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index c8574092e4..e39686abd4 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -201,7 +201,7 @@ public:
return Status::OK();
}
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)
const override {
+ uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t
size) const override {
if (column.is_nullable()) {
auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -211,15 +211,15 @@ public:
auto& nested_col = nullable_col->get_nested_column();
if (_opposite) {
- _base_evaluate<true, true>(&nested_col, &null_bitmap, sel,
size);
+ return _base_evaluate<true, true>(&nested_col, &null_bitmap,
sel, size);
} else {
- _base_evaluate<true, false>(&nested_col, &null_bitmap, sel,
size);
+ return _base_evaluate<true, false>(&nested_col, &null_bitmap,
sel, size);
}
} else {
if (_opposite) {
- _base_evaluate<false, true>(&column, nullptr, sel, size);
+ return _base_evaluate<false, true>(&column, nullptr, sel,
size);
} else {
- _base_evaluate<false, false>(&column, nullptr, sel, size);
+ return _base_evaluate<false, false>(&column, nullptr, sel,
size);
}
}
}
@@ -285,9 +285,9 @@ private:
}
template <bool is_nullable, bool is_opposite>
- void _base_evaluate(const vectorized::IColumn* column,
- const vectorized::PaddedPODArray<vectorized::UInt8>*
null_map,
- uint16_t* sel, uint16_t* size) const {
+ uint16_t _base_evaluate(const vectorized::IColumn* column,
+ const
vectorized::PaddedPODArray<vectorized::UInt8>* null_map,
+ uint16_t* sel, uint16_t size) const {
uint16_t new_size = 0;
if (column->is_column_dictionary()) {
@@ -297,7 +297,7 @@ private:
auto& data_array = nested_col_ptr->get_data();
nested_col_ptr->find_codes(_values, _value_in_dict_flags);
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
if constexpr (is_nullable) {
if ((*null_map)[idx]) {
@@ -326,7 +326,7 @@ private:
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
auto& data_array = nested_col_ptr->get_data();
- for (uint16_t i = 0; i < *size; i++) {
+ for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
if constexpr (is_nullable) {
if ((*null_map)[idx]) {
@@ -351,7 +351,7 @@ private:
}
}
- *size = new_size;
+ return new_size;
}
phmap::flat_hash_set<T> _values;
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index 160e49ebe8..3b94cf6b8f 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -125,19 +125,20 @@ Status NullPredicate::evaluate(const Schema& schema,
return Status::OK();
}
-void NullPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t* size) const {
+uint16_t NullPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const {
uint16_t new_size = 0;
if (auto* nullable = check_and_get_column<ColumnNullable>(column)) {
auto& null_map = nullable->get_null_map_data();
- for (uint16_t i = 0; i < *size; ++i) {
+ for (uint16_t i = 0; i < size; ++i) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size += (null_map[idx] == _is_null);
}
- *size = new_size;
+ return new_size;
} else {
- if (_is_null) *size = 0;
+ if (_is_null) return 0;
}
+ return size;
}
void NullPredicate::evaluate_or(IColumn& column, uint16_t* sel, uint16_t size,
bool* flags) const {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 4632b6055f..294ce339b5 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -44,7 +44,7 @@ public:
virtual Status evaluate(const Schema& schema, const
vector<BitmapIndexIterator*>& iterators,
uint32_t num_rows, roaring::Roaring* roaring)
const override;
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)
const override;
+ uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t
size) const override;
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
bool* flags) const override;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 6d3bfcd5ec..b9d511b05a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -856,14 +856,14 @@ Status SegmentIterator::_read_columns_by_index(uint32_t
nrows_read_limit, uint32
return Status::OK();
}
-void SegmentIterator::_evaluate_vectorization_predicate(uint16_t*
sel_rowid_idx,
- uint16_t&
selected_size) {
+uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t*
sel_rowid_idx,
+ uint16_t
selected_size) {
SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
if (!_is_need_vec_eval) {
for (uint32_t i = 0; i < selected_size; ++i) {
sel_rowid_idx[i] = i;
}
- return;
+ return selected_size;
}
uint16_t original_size = selected_size;
@@ -894,17 +894,17 @@ void
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
}
_opts.stats->rows_vec_cond_filtered += original_size - new_size;
- selected_size = new_size;
+ return new_size;
}
-void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t*
vec_sel_rowid_idx,
- uint16_t*
selected_size_ptr) {
+uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t*
vec_sel_rowid_idx,
+ uint16_t
selected_size) {
SCOPED_RAW_TIMER(&_opts.stats->short_cond_ns);
if (!_is_need_short_eval) {
- return;
+ return selected_size;
}
- uint16_t original_size = *selected_size_ptr;
+ uint16_t original_size = selected_size;
for (auto predicate : _short_cir_eval_predicate) {
auto column_id = predicate->column_id();
auto& short_cir_column = _current_return_columns[column_id];
@@ -914,15 +914,16 @@ void
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
predicate->type() == PredicateType::GT || predicate->type() ==
PredicateType::GE) {
col_ptr->convert_dict_codes_if_necessary();
}
- predicate->evaluate(*short_cir_column, vec_sel_rowid_idx,
selected_size_ptr);
+ selected_size = predicate->evaluate(*short_cir_column,
vec_sel_rowid_idx, selected_size);
}
- _opts.stats->rows_vec_cond_filtered += original_size - *selected_size_ptr;
+ _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
// evaluate delete condition
- original_size = *selected_size_ptr;
- _opts.delete_condition_predicates->evaluate(_current_return_columns,
vec_sel_rowid_idx,
- selected_size_ptr);
- _opts.stats->rows_vec_del_cond_filtered += original_size -
*selected_size_ptr;
+ original_size = selected_size;
+ selected_size =
_opts.delete_condition_predicates->evaluate(_current_return_columns,
+
vec_sel_rowid_idx, selected_size);
+ _opts.stats->rows_vec_del_cond_filtered += original_size - selected_size;
+ return selected_size;
}
void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>&
read_column_ids,
@@ -1006,13 +1007,13 @@ Status SegmentIterator::next_batch(vectorized::Block*
block) {
uint16_t sel_rowid_idx[selected_size];
// step 1: evaluate vectorization predicate
- _evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
+ selected_size = _evaluate_vectorization_predicate(sel_rowid_idx,
selected_size);
// step 2: evaluate short ciruit predicate
// todo(wb) research whether need to read short predicate after
vectorization evaluation
// to reduce cost of read short circuit columns.
// In SSB test, it make no difference; So need more scenarios
to test
- _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
+ selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx,
selected_size);
if (!_lazy_materialization_read) {
Status ret = _output_column_by_sel_idx(block,
_first_read_column_ids, sel_rowid_idx,
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 0ce9975456..04c68699a0 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -99,8 +99,8 @@ private:
bool set_block_rowid);
void _init_current_block(vectorized::Block* block,
std::vector<vectorized::MutableColumnPtr>&
non_pred_vector);
- void _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t&
selected_size);
- void _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t*
selected_size);
+ uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
uint16_t selected_size);
+ uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx,
uint16_t selected_size);
void _output_non_pred_columns(vectorized::Block* block);
void _read_columns_by_rowids(std::vector<ColumnId>& read_column_ids,
std::vector<rowid_t>& rowid_vector, uint16_t*
sel_rowid_idx,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]