This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 57327e6236 [improvement]Separate input and output parameters in 
ColumnPredicate (#10249)
57327e6236 is described below

commit 57327e6236e4ed5943f976dec54bf10710f445e1
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Jun 20 15:04:57 2022 +0800

    [improvement]Separate input and output parameters in ColumnPredicate 
(#10249)
    
    ```cpp
    for (uint16_t i = 0; i < *size; ++i) {
            // some code here
    }
    ```
    The value of size is read for each conditional test, which also prevents 
possible vectorization.
---
 be/src/olap/block_column_predicate.cpp             | 29 +++++-----
 be/src/olap/block_column_predicate.h               | 18 ++++---
 be/src/olap/bloom_filter_predicate.h               | 20 +++----
 be/src/olap/column_predicate.h                     |  4 +-
 be/src/olap/comparison_predicate.cpp               | 62 +++++++++++-----------
 be/src/olap/comparison_predicate.h                 |  3 +-
 be/src/olap/in_list_predicate.h                    | 22 ++++----
 be/src/olap/null_predicate.cpp                     |  9 ++--
 be/src/olap/null_predicate.h                       |  2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 33 ++++++------
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  4 +-
 11 files changed, 107 insertions(+), 99 deletions(-)

diff --git a/be/src/olap/block_column_predicate.cpp 
b/be/src/olap/block_column_predicate.cpp
index 5fce8beebe..fc35bd8de9 100644
--- a/be/src/olap/block_column_predicate.cpp
+++ b/be/src/olap/block_column_predicate.cpp
@@ -41,11 +41,11 @@ void SingleColumnBlockPredicate::evaluate_or(RowBlockV2* 
block, uint16_t selecte
     _predicate->evaluate_or(&column_block, block->selection_vector(), 
selected_size, flags);
 }
 
-void SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& block, 
uint16_t* sel,
-                                          uint16_t* selected_size) const {
+uint16_t SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& 
block, uint16_t* sel,
+                                              uint16_t selected_size) const {
     auto column_id = _predicate->column_id();
     auto& column = block[column_id];
-    _predicate->evaluate(*column, sel, selected_size);
+    return _predicate->evaluate(*column, sel, selected_size);
 }
 
 void SingleColumnBlockPredicate::evaluate_and(vectorized::MutableColumns& 
block, uint16_t* sel,
@@ -90,25 +90,25 @@ void OrBlockColumnPredicate::evaluate(RowBlockV2* block, 
uint16_t* selected_size
     }
 }
 
-void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, 
uint16_t* sel,
-                                      uint16_t* selected_size) const {
+uint16_t OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, 
uint16_t* sel,
+                                          uint16_t selected_size) const {
     if (num_of_column_predicate() == 1) {
-        _block_column_predicate_vec[0]->evaluate(block, sel, selected_size);
+        return _block_column_predicate_vec[0]->evaluate(block, sel, 
selected_size);
     } else {
-        bool ret_flags[*selected_size];
-        memset(ret_flags, false, *selected_size);
+        bool ret_flags[selected_size];
+        memset(ret_flags, false, selected_size);
         for (int i = 0; i < num_of_column_predicate(); ++i) {
             auto column_predicate = _block_column_predicate_vec[i];
-            column_predicate->evaluate_or(block, sel, *selected_size, 
ret_flags);
+            column_predicate->evaluate_or(block, sel, selected_size, 
ret_flags);
         }
 
         uint16_t new_size = 0;
-        for (int i = 0; i < *selected_size; ++i) {
+        for (int i = 0; i < selected_size; ++i) {
             if (ret_flags[i]) {
                 sel[new_size++] = sel[i];
             }
         }
-        *selected_size = new_size;
+        return new_size;
     }
 }
 
@@ -168,11 +168,12 @@ void AndBlockColumnPredicate::evaluate(RowBlockV2* block, 
uint16_t* selected_siz
     }
 }
 
-void AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, 
uint16_t* sel,
-                                       uint16_t* selected_size) const {
+uint16_t AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, 
uint16_t* sel,
+                                           uint16_t selected_size) const {
     for (auto block_column_predicate : _block_column_predicate_vec) {
-        block_column_predicate->evaluate(block, sel, selected_size);
+        selected_size = block_column_predicate->evaluate(block, sel, 
selected_size);
     }
+    return selected_size;
 }
 
 void AndBlockColumnPredicate::evaluate_and(RowBlockV2* block, uint16_t 
selected_size,
diff --git a/be/src/olap/block_column_predicate.h 
b/be/src/olap/block_column_predicate.h
index 4bd4d7baf8..219a92c3aa 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -43,8 +43,10 @@ public:
 
     virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 
0;
 
-    virtual void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
-                          uint16_t* selected_size) const {};
+    virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+                              uint16_t selected_size) const {
+        return selected_size;
+    }
     virtual void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
                               uint16_t selected_size, bool* flags) const {};
     virtual void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel,
@@ -66,8 +68,8 @@ public:
         column_id_set.insert(_predicate->column_id());
     };
 
-    void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
-                  uint16_t* selected_size) const override;
+    uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+                      uint16_t selected_size) const override;
     void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
                       bool* flags) const override;
     void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
@@ -115,8 +117,8 @@ public:
     void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags) 
const override;
     void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) 
const override;
 
-    void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
-                  uint16_t* selected_size) const override;
+    uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+                      uint16_t selected_size) const override;
     void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
                       bool* flags) const override;
     void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
@@ -135,8 +137,8 @@ public:
     // 2.Evaluate OR SEMANTICS in flags use 1 result to get proper select flags
     void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) 
const override;
 
-    void evaluate(vectorized::MutableColumns& block, uint16_t* sel,
-                  uint16_t* selected_size) const override;
+    uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
+                      uint16_t selected_size) const override;
     void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
                       bool* flags) const override;
     void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index eac480ccea..9e6901cbfb 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -65,7 +65,7 @@ public:
         return Status::OK();
     }
 
-    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) 
const override;
+    uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size) const override;
 
 private:
     std::shared_ptr<IBloomFilterFuncBase> _filter;
@@ -111,12 +111,12 @@ void BloomFilterColumnPredicate<T>::evaluate(ColumnBlock* 
block, uint16_t* sel,
 }
 
 template <PrimitiveType T>
-void BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,
-                                             uint16_t* size) const {
+uint16_t BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,
+                                                 uint16_t size) const {
     uint16_t new_size = 0;
     using FT = typename PredicatePrimitiveTypeTraits<T>::PredicateFieldType;
     if (!_enable_pred) {
-        return;
+        return size;
     }
     if (column.is_nullable()) {
         auto* nullable_col = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -127,7 +127,7 @@ void 
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
                     nullable_col->get_nested_column());
             const_cast<vectorized::ColumnDictI32*>(dict_col)
                     ->generate_hash_values_for_runtime_filter();
-            for (uint16_t i = 0; i < *size; i++) {
+            for (uint16_t i = 0; i < size; i++) {
                 uint16_t idx = sel[i];
                 sel[new_size] = idx;
                 new_size += (!null_map_data[idx]) &&
@@ -137,7 +137,7 @@ void 
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
             auto* pred_col = 
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(
                     nullable_col->get_nested_column());
             auto& pred_col_data = pred_col->get_data();
-            for (uint16_t i = 0; i < *size; i++) {
+            for (uint16_t i = 0; i < size; i++) {
                 uint16_t idx = sel[i];
                 sel[new_size] = idx;
                 const auto* cell_value = reinterpret_cast<const 
void*>(&(pred_col_data[idx]));
@@ -147,7 +147,7 @@ void 
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
     } else if (column.is_column_dictionary()) {
         auto* dict_col = 
vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
         
const_cast<vectorized::ColumnDictI32*>(dict_col)->generate_hash_values_for_runtime_filter();
-        for (uint16_t i = 0; i < *size; i++) {
+        for (uint16_t i = 0; i < size; i++) {
             uint16_t idx = sel[i];
             sel[new_size] = idx;
             new_size += 
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
@@ -156,7 +156,7 @@ void 
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
         auto* pred_col =
                 
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(column);
         auto& pred_col_data = pred_col->get_data();
-        for (uint16_t i = 0; i < *size; i++) {
+        for (uint16_t i = 0; i < size; i++) {
             uint16_t idx = sel[i];
             sel[new_size] = idx;
             const auto* cell_value = reinterpret_cast<const 
void*>(&(pred_col_data[idx]));
@@ -166,14 +166,14 @@ void 
BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, uint16
     // If the pass rate is very high, for example > 50%, then the bloomfilter 
is useless.
     // Some bloomfilter is useless, for example ssb 4.3, it consumes a lot of 
cpu but it is
     // useless.
-    _evaluated_rows += *size;
+    _evaluated_rows += size;
     _passed_rows += new_size;
     if (_evaluated_rows > config::bloom_filter_predicate_check_row_num) {
         if (_passed_rows / (_evaluated_rows * 1.0) > 0.5) {
             _enable_pred = false;
         }
     }
-    *size = new_size;
+    return new_size;
 }
 
 class BloomFilterColumnPredicateFactory {
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 7de6d1b6eb..86580875e6 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -73,7 +73,9 @@ public:
 
     // evaluate predicate on IColumn
     // a short circuit eval way
-    virtual void evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {};
+    virtual uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size) const {
+        return size;
+    };
     virtual void evaluate_and(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size,
                               bool* flags) const {};
     virtual void evaluate_or(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size,
diff --git a/be/src/olap/comparison_predicate.cpp 
b/be/src/olap/comparison_predicate.cpp
index 2223d1fe19..fc50c354fd 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -149,7 +149,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 // by this way, avoid redundant code
 #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)                   
                    \
     template <class T>                                                         
                    \
-    void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {    \
+    uint16_t CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size) const { \
         uint16_t new_size = 0;                                                 
                    \
         if (column.is_nullable()) {                                            
                    \
             auto* nullable_col =                                               
                    \
@@ -166,7 +166,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                     auto dict_code =                                           
                    \
                             IS_RANGE ? 
nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1)  \
                                      : nested_col_ptr->find_code(_value);      
                    \
-                    for (uint16_t i = 0; i < *size; i++) {                     
                    \
+                    for (uint16_t i = 0; i < size; i++) {                      
                    \
                         uint16_t idx = sel[i];                                 
                    \
                         sel[new_size] = idx;                                   
                    \
                         const auto& cell_value = data_array[idx];              
                    \
@@ -179,7 +179,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                         
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(      \
                                 nested_col);                                   
                    \
                 auto& data_array = nested_col_ptr->get_data();                 
                    \
-                for (uint16_t i = 0; i < *size; i++) {                         
                    \
+                for (uint16_t i = 0; i < size; i++) {                          
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
                     const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);          \
@@ -195,7 +195,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                 auto& data_array = dict_col.get_data();                        
                    \
                 auto dict_code = IS_RANGE ? 
dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1)    \
                                           : dict_col.find_code(_value);        
                    \
-                for (uint16_t i = 0; i < *size; ++i) {                         
                    \
+                for (uint16_t i = 0; i < size; ++i) {                          
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
                     const auto& cell_value = data_array[idx];                  
                    \
@@ -206,7 +206,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
         } else {                                                               
                    \
             auto& pred_column_ref = 
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
             auto& data_array = pred_column_ref.get_data();                     
                    \
-            for (uint16_t i = 0; i < *size; i++) {                             
                    \
+            for (uint16_t i = 0; i < size; i++) {                              
                    \
                 uint16_t idx = sel[i];                                         
                    \
                 sel[new_size] = idx;                                           
                    \
                 const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);              \
@@ -214,7 +214,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                 new_size += _opposite ? !ret : ret;                            
                    \
             }                                                                  
                    \
         }                                                                      
                    \
-        *size = new_size;                                                      
                    \
+        return new_size;                                                       
                    \
     }
 
 COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
@@ -609,31 +609,31 @@ 
COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(LessEqualPredicate)
 COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterPredicate)
 COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterEqualPredicate)
 
-#define COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(CLASS)                     
            \
-    template void CLASS<int8_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,      \
-                                          uint16_t* size) const;               
            \
-    template void CLASS<int16_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
-                                           uint16_t* size) const;              
            \
-    template void CLASS<int32_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
-                                           uint16_t* size) const;              
            \
-    template void CLASS<int64_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
-                                           uint16_t* size) const;              
            \
-    template void CLASS<int128_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
-                                            uint16_t* size) const;             
            \
-    template void CLASS<float>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,       \
-                                         uint16_t* size) const;                
            \
-    template void CLASS<double>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,      \
-                                          uint16_t* size) const;               
            \
-    template void CLASS<decimal12_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel, \
-                                               uint16_t* size) const;          
            \
-    template void CLASS<StringValue>::evaluate(vectorized::IColumn& column, 
uint16_t* sel, \
-                                               uint16_t* size) const;          
            \
-    template void CLASS<uint24_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
-                                            uint16_t* size) const;             
            \
-    template void CLASS<uint64_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
-                                            uint16_t* size) const;             
            \
-    template void CLASS<bool>::evaluate(vectorized::IColumn& column, uint16_t* 
sel,        \
-                                        uint16_t* size) const;
+#define COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(CLASS)                     
                \
+    template uint16_t CLASS<int8_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,      \
+                                              uint16_t size) const;            
                \
+    template uint16_t CLASS<int16_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
+                                               uint16_t size) const;           
                \
+    template uint16_t CLASS<int32_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
+                                               uint16_t size) const;           
                \
+    template uint16_t CLASS<int64_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,     \
+                                               uint16_t size) const;           
                \
+    template uint16_t CLASS<int128_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
+                                                uint16_t size) const;          
                \
+    template uint16_t CLASS<float>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,       \
+                                             uint16_t size) const;             
                \
+    template uint16_t CLASS<double>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,      \
+                                              uint16_t size) const;            
                \
+    template uint16_t CLASS<decimal12_t>::evaluate(vectorized::IColumn& 
column, uint16_t* sel, \
+                                                   uint16_t size) const;       
                \
+    template uint16_t CLASS<StringValue>::evaluate(vectorized::IColumn& 
column, uint16_t* sel, \
+                                                   uint16_t size) const;       
                \
+    template uint16_t CLASS<uint24_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
+                                                uint16_t size) const;          
                \
+    template uint16_t CLASS<uint64_t>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,    \
+                                                uint16_t size) const;          
                \
+    template uint16_t CLASS<bool>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,        \
+                                            uint16_t size) const;
 
 COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(EqualPredicate)
 COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(NotEqualPredicate)
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 54ddd11e50..40b93fb888 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -40,7 +40,8 @@ class VectorizedRowBatch;
         virtual Status evaluate(const Schema& schema,                          
                    \
                                 const std::vector<BitmapIndexIterator*>& 
iterators,                \
                                 uint32_t num_rows, roaring::Roaring* roaring) 
const override;      \
-        void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* 
size) const override;  \
+        uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel,          
                    \
+                          uint16_t size) const override;                       
                    \
         void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size,               \
                           bool* flags) const override;                         
                    \
         void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size,                \
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index c8574092e4..e39686abd4 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -201,7 +201,7 @@ public:
         return Status::OK();
     }
 
-    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) 
const override {
+    uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size) const override {
         if (column.is_nullable()) {
             auto* nullable_col =
                     
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -211,15 +211,15 @@ public:
             auto& nested_col = nullable_col->get_nested_column();
 
             if (_opposite) {
-                _base_evaluate<true, true>(&nested_col, &null_bitmap, sel, 
size);
+                return _base_evaluate<true, true>(&nested_col, &null_bitmap, 
sel, size);
             } else {
-                _base_evaluate<true, false>(&nested_col, &null_bitmap, sel, 
size);
+                return _base_evaluate<true, false>(&nested_col, &null_bitmap, 
sel, size);
             }
         } else {
             if (_opposite) {
-                _base_evaluate<false, true>(&column, nullptr, sel, size);
+                return _base_evaluate<false, true>(&column, nullptr, sel, 
size);
             } else {
-                _base_evaluate<false, false>(&column, nullptr, sel, size);
+                return _base_evaluate<false, false>(&column, nullptr, sel, 
size);
             }
         }
     }
@@ -285,9 +285,9 @@ private:
     }
 
     template <bool is_nullable, bool is_opposite>
-    void _base_evaluate(const vectorized::IColumn* column,
-                        const vectorized::PaddedPODArray<vectorized::UInt8>* 
null_map,
-                        uint16_t* sel, uint16_t* size) const {
+    uint16_t _base_evaluate(const vectorized::IColumn* column,
+                            const 
vectorized::PaddedPODArray<vectorized::UInt8>* null_map,
+                            uint16_t* sel, uint16_t size) const {
         uint16_t new_size = 0;
 
         if (column->is_column_dictionary()) {
@@ -297,7 +297,7 @@ private:
                 auto& data_array = nested_col_ptr->get_data();
                 nested_col_ptr->find_codes(_values, _value_in_dict_flags);
 
-                for (uint16_t i = 0; i < *size; i++) {
+                for (uint16_t i = 0; i < size; i++) {
                     uint16_t idx = sel[i];
                     if constexpr (is_nullable) {
                         if ((*null_map)[idx]) {
@@ -326,7 +326,7 @@ private:
                     
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
             auto& data_array = nested_col_ptr->get_data();
 
-            for (uint16_t i = 0; i < *size; i++) {
+            for (uint16_t i = 0; i < size; i++) {
                 uint16_t idx = sel[i];
                 if constexpr (is_nullable) {
                     if ((*null_map)[idx]) {
@@ -351,7 +351,7 @@ private:
             }
         }
 
-        *size = new_size;
+        return new_size;
     }
 
     phmap::flat_hash_set<T> _values;
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index 160e49ebe8..3b94cf6b8f 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -125,19 +125,20 @@ Status NullPredicate::evaluate(const Schema& schema,
     return Status::OK();
 }
 
-void NullPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {
+uint16_t NullPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size) const {
     uint16_t new_size = 0;
     if (auto* nullable = check_and_get_column<ColumnNullable>(column)) {
         auto& null_map = nullable->get_null_map_data();
-        for (uint16_t i = 0; i < *size; ++i) {
+        for (uint16_t i = 0; i < size; ++i) {
             uint16_t idx = sel[i];
             sel[new_size] = idx;
             new_size += (null_map[idx] == _is_null);
         }
-        *size = new_size;
+        return new_size;
     } else {
-        if (_is_null) *size = 0;
+        if (_is_null) return 0;
     }
+    return size;
 }
 
 void NullPredicate::evaluate_or(IColumn& column, uint16_t* sel, uint16_t size, 
bool* flags) const {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 4632b6055f..294ce339b5 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -44,7 +44,7 @@ public:
     virtual Status evaluate(const Schema& schema, const 
vector<BitmapIndexIterator*>& iterators,
                             uint32_t num_rows, roaring::Roaring* roaring) 
const override;
 
-    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) 
const override;
+    uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size) const override;
 
     void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
                      bool* flags) const override;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 6d3bfcd5ec..b9d511b05a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -856,14 +856,14 @@ Status SegmentIterator::_read_columns_by_index(uint32_t 
nrows_read_limit, uint32
     return Status::OK();
 }
 
-void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* 
sel_rowid_idx,
-                                                        uint16_t& 
selected_size) {
+uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* 
sel_rowid_idx,
+                                                            uint16_t 
selected_size) {
     SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
     if (!_is_need_vec_eval) {
         for (uint32_t i = 0; i < selected_size; ++i) {
             sel_rowid_idx[i] = i;
         }
-        return;
+        return selected_size;
     }
 
     uint16_t original_size = selected_size;
@@ -894,17 +894,17 @@ void 
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
     }
 
     _opts.stats->rows_vec_cond_filtered += original_size - new_size;
-    selected_size = new_size;
+    return new_size;
 }
 
-void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* 
vec_sel_rowid_idx,
-                                                        uint16_t* 
selected_size_ptr) {
+uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* 
vec_sel_rowid_idx,
+                                                            uint16_t 
selected_size) {
     SCOPED_RAW_TIMER(&_opts.stats->short_cond_ns);
     if (!_is_need_short_eval) {
-        return;
+        return selected_size;
     }
 
-    uint16_t original_size = *selected_size_ptr;
+    uint16_t original_size = selected_size;
     for (auto predicate : _short_cir_eval_predicate) {
         auto column_id = predicate->column_id();
         auto& short_cir_column = _current_return_columns[column_id];
@@ -914,15 +914,16 @@ void 
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
             predicate->type() == PredicateType::GT || predicate->type() == 
PredicateType::GE) {
             col_ptr->convert_dict_codes_if_necessary();
         }
-        predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, 
selected_size_ptr);
+        selected_size = predicate->evaluate(*short_cir_column, 
vec_sel_rowid_idx, selected_size);
     }
-    _opts.stats->rows_vec_cond_filtered += original_size - *selected_size_ptr;
+    _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
 
     // evaluate delete condition
-    original_size = *selected_size_ptr;
-    _opts.delete_condition_predicates->evaluate(_current_return_columns, 
vec_sel_rowid_idx,
-                                                selected_size_ptr);
-    _opts.stats->rows_vec_del_cond_filtered += original_size - 
*selected_size_ptr;
+    original_size = selected_size;
+    selected_size = 
_opts.delete_condition_predicates->evaluate(_current_return_columns,
+                                                                
vec_sel_rowid_idx, selected_size);
+    _opts.stats->rows_vec_del_cond_filtered += original_size - selected_size;
+    return selected_size;
 }
 
 void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& 
read_column_ids,
@@ -1006,13 +1007,13 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
         uint16_t sel_rowid_idx[selected_size];
 
         // step 1: evaluate vectorization predicate
-        _evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
+        selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, 
selected_size);
 
         // step 2: evaluate short ciruit predicate
         // todo(wb) research whether need to read short predicate after 
vectorization evaluation
         //          to reduce cost of read short circuit columns.
         //          In SSB test, it make no difference; So need more scenarios 
to test
-        _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
+        selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, 
selected_size);
 
         if (!_lazy_materialization_read) {
             Status ret = _output_column_by_sel_idx(block, 
_first_read_column_ids, sel_rowid_idx,
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 0ce9975456..04c68699a0 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -99,8 +99,8 @@ private:
                                   bool set_block_rowid);
     void _init_current_block(vectorized::Block* block,
                              std::vector<vectorized::MutableColumnPtr>& 
non_pred_vector);
-    void _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& 
selected_size);
-    void _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t* 
selected_size);
+    uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, 
uint16_t selected_size);
+    uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, 
uint16_t selected_size);
     void _output_non_pred_columns(vectorized::Block* block);
     void _read_columns_by_rowids(std::vector<ColumnId>& read_column_ids,
                                  std::vector<rowid_t>& rowid_vector, uint16_t* 
sel_rowid_idx,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to