This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 3659b8bb90a97761d6d72e938113bdff86cc846e
Author: ZenoYang <[email protected]>
AuthorDate: Fri Apr 29 11:45:04 2022 +0800

    [fix](vectorized) Query get wrong result when ColumnDict concurrent 
predicate eval (#9270)
---
 be/src/olap/column_predicate.h                     |  4 +-
 be/src/olap/comparison_predicate.cpp               | 96 ++++------------------
 be/src/olap/comparison_predicate.h                 |  3 -
 be/src/olap/in_list_predicate.cpp                  | 37 +--------
 be/src/olap/in_list_predicate.h                    |  5 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  3 +-
 6 files changed, 22 insertions(+), 126 deletions(-)

diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 45f8fcfa7f..56f817d9f3 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -42,7 +42,7 @@ enum class PredicateType {
     GT = 5,
     GE = 6,
     IN_LIST = 7,
-    NO_IN_LIST = 8,
+    NOT_IN_LIST = 8,
     IS_NULL = 9,
     NOT_IS_NULL = 10,
     BF = 11, // BloomFilter
@@ -86,8 +86,6 @@ public:
     virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, 
bool* flags) const {};
     uint32_t column_id() const { return _column_id; }
 
-    virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { }
-
 protected:
     uint32_t _column_id;
     bool _opposite;
diff --git a/be/src/olap/comparison_predicate.cpp 
b/be/src/olap/comparison_predicate.cpp
index 57549c349c..45a89f92ad 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -147,7 +147,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 
 // todo(zeno) define interface in IColumn to simplify code
-#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP)                             
                    \
+#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)                   
                    \
     template <class T>                                                         
                    \
     void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {    \
         uint16_t new_size = 0;                                                 
                    \
@@ -163,11 +163,14 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                     auto* nested_col_ptr = vectorized::check_and_get_column<   
                    \
                             
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
                     auto& data_array = nested_col_ptr->get_data();             
                    \
+                    auto dict_code =                                           
                    \
+                            IS_RANGE ? 
nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1)  \
+                                     : nested_col_ptr->find_code(_value);      
                    \
                     for (uint16_t i = 0; i < *size; i++) {                     
                    \
                         uint16_t idx = sel[i];                                 
                    \
                         sel[new_size] = idx;                                   
                    \
                         const auto& cell_value = data_array[idx];              
                    \
-                        bool ret = !null_bitmap[idx] && (cell_value OP 
_dict_code);                \
+                        bool ret = !null_bitmap[idx] && (cell_value OP 
dict_code);                 \
                         new_size += _opposite ? !ret : ret;                    
                    \
                     }                                                          
                    \
                 }                                                              
                    \
@@ -184,20 +187,20 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
             }                                                                  
                    \
-            *size = new_size;                                                  
                    \
         } else if (column.is_column_dictionary()) {                            
                    \
             if constexpr (std::is_same_v<T, StringValue>) {                    
                    \
                 auto& dict_col =                                               
                    \
                         
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
                 auto& data_array = dict_col.get_data();                        
                    \
+                auto dict_code = IS_RANGE ? 
dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1)    \
+                                          : dict_col.find_code(_value);        
                    \
                 for (uint16_t i = 0; i < *size; ++i) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
                     const auto& cell_value = data_array[idx];                  
                    \
-                    bool ret = cell_value OP _dict_code;                       
                    \
+                    bool ret = cell_value OP dict_code;                        
                   \
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
-                *size = new_size;                                              
                    \
             }                                                                  
                    \
         } else {                                                               
                    \
             auto& pred_column_ref =                                            
                    \
@@ -210,17 +213,17 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                 auto ret = cell_value OP _value;                               
                    \
                 new_size += _opposite ? !ret : ret;                            
                    \
             }                                                                  
                    \
-            *size = new_size;                                                  
                    \
         }                                                                      
                    \
+        *size = new_size;                                                      
                    \
     }
 
 
-COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==)
-COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=)
-COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <)
-COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=)
-COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >)
-COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=)
+COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
+COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false)
+COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true)
+COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
 
 #define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP)                         
                \
     template <class T>                                                         
                \
@@ -502,65 +505,6 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=)
 COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
 
-
-#define COMPARISON_PRED_SET_DICT_CODE(CLASS)                                   
                \
-    template <class T>                                                         
                \
-    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
-        if (_dict_code_inited) {                                               
                \
-            return;                                                            
                \
-        }                                                                      
                \
-        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
-            auto* col_ptr = column.get_ptr().get();                            
                \
-            if (column.is_nullable()) {                                        
                \
-                auto nullable_col =                                            
                \
-                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
-                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
-            }                                                                  
                \
-            if (col_ptr->is_column_dictionary()) {                             
                \
-                auto& dict_col =                                               
                \
-                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
-                                *col_ptr);                                     
                \
-                _dict_code = dict_col.find_code(_value);                       
                \
-                _dict_code_inited = true;                                      
                \
-            }                                                                  
                \
-        }                                                                      
                \
-    }
-
-COMPARISON_PRED_SET_DICT_CODE(EqualPredicate)
-COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate)
-
-// If 1 OP 0 returns true, it means the predicate is > or >=
-// If 1 OP 1 returns true, it means the predicate is >= or <=
-// by this way, avoid redundant code
-#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP)                         
                \
-    template <class T>                                                         
                \
-    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
-        if (_dict_code_inited) {                                               
                \
-            return;                                                            
                \
-        }                                                                      
                \
-        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
-            auto* col_ptr = column.get_ptr().get();                            
                \
-            if (column.is_nullable()) {                                        
                \
-                auto nullable_col =                                            
                \
-                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
-                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
-            }                                                                  
                \
-                                                                               
                \
-            if (col_ptr->is_column_dictionary()) {                             
                \
-                auto& dict_col =                                               
                \
-                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
-                                *col_ptr);                                     
                \
-                _dict_code = dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 
1);              \
-                _dict_code_inited = true;                                      
                \
-            }                                                                  
                \
-        }                                                                      
                \
-    }
-
-RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <)
-RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=)
-RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >)
-RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=)
-
 #define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS)                         
                \
     template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value, 
bool opposite);     \
     template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value, 
bool opposite);   \
@@ -745,14 +689,4 @@ 
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate)
 COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate)
 COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate)
 
-#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \
-template void 
CLASS<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
-
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate)
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate)
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate)
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate)
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate)
-COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate)
-
 } //namespace doris
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 3df31c370d..7e7f718a76 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -47,11 +47,8 @@ class VectorizedRowBatch;
         void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size,                \
                          bool* flags) const override;                          
                    \
         void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* 
flags) const override; \
-        void set_dict_code_if_necessary(vectorized::IColumn& column) override; 
                    \
     private:                                                                   
                    \
         T _value;                                                              
                    \
-        bool _dict_code_inited = false;                                        
                    \
-        int32_t _dict_code;                                                    
                    \
     };
 
 COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ)
diff --git a/be/src/olap/in_list_predicate.cpp 
b/be/src/olap/in_list_predicate.cpp
index 0401673926..9b78a8705f 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -134,12 +134,13 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
                     auto* nested_col_ptr = vectorized::check_and_get_column<   
                    \
                             
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
                     auto& data_array = nested_col_ptr->get_data();             
                    \
+                    auto dict_codes = nested_col_ptr->find_codes(_values);     
                    \
                     for (uint16_t i = 0; i < *size; i++) {                     
                    \
                         uint16_t idx = sel[i];                                 
                    \
                         sel[new_size] = idx;                                   
                    \
                         const auto& cell_value = data_array[idx];              
                    \
                         bool ret = !null_bitmap[idx]                           
                    \
-                                   && (_dict_codes.find(cell_value) OP 
_dict_codes.end());         \
+                                   && (dict_codes.find(cell_value) OP 
dict_codes.end());           \
                         new_size += _opposite ? !ret : ret;                    
                    \
                     }                                                          
                    \
                 }                                                              
                    \
@@ -155,18 +156,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
             }                                                                  
                    \
-            *size = new_size;                                                  
                    \
         } else if (column.is_column_dictionary()) {                            
                    \
             if constexpr (std::is_same_v<T, StringValue>) {                    
                    \
                 auto& dict_col =                                               
                    \
                         
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(        \
                                 column);                                       
                    \
                 auto& data_array = dict_col.get_data();                        
                    \
+                auto dict_codes = dict_col.find_codes(_values);                
                    \
                 for (uint16_t i = 0; i < *size; i++) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
                     const auto& cell_value = data_array[idx];                  
                    \
-                    auto result = (_dict_codes.find(cell_value) OP 
_dict_codes.end());             \
+                    auto result = (dict_codes.find(cell_value) OP 
dict_codes.end());               \
                     new_size += _opposite ? !result : result;                  
                    \
                 }                                                              
                    \
             }                                                                  
                    \
@@ -282,32 +283,6 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, 
==)
 IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
 IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
 
-#define IN_LIST_PRED_SET_DICT_CODE(CLASS)                                      
                \
-    template <class T>                                                         
                \
-    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
-        if (_dict_code_inited) {                                               
                \
-            return;                                                            
                \
-        }                                                                      
                \
-        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
-            auto* col_ptr = column.get_ptr().get();                            
                \
-            if (column.is_nullable()) {                                        
                \
-                auto nullable_col =                                            
                \
-                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
-                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
-            }                                                                  
                \
-            if (col_ptr->is_column_dictionary()) {                             
                \
-                auto& dict_col =                                               
                \
-                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
-                                *col_ptr);                                     
                \
-                _dict_codes = dict_col.find_codes(_values);                    
                \
-                _dict_code_inited = true;                                      
                \
-            }                                                                  
                \
-        }                                                                      
                \
-    }
-
-IN_LIST_PRED_SET_DICT_CODE(InListPredicate)
-IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate)
-
 #define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS)                            
                    \
     template CLASS<int8_t>::CLASS(uint32_t column_id, 
phmap::flat_hash_set<int8_t>&& values,       \
                                   bool opposite);                              
                    \
@@ -415,8 +390,4 @@ 
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
 IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
 IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
 
-template void 
InListPredicate<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& 
column);
-template void NotInListPredicate<StringValue>::set_dict_code_if_necessary(
-        vectorized::IColumn& column);
-
 } //namespace doris
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 089ee84a06..33682b6824 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -96,15 +96,12 @@ class VectorizedRowBatch;
         void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* 
size) const override; \
         void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size, bool* flags) const override {} \
         void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size, bool* flags) const override {} \
-        void set_dict_code_if_necessary(vectorized::IColumn& column) override; 
                   \
     private:                                                                   
                   \
         phmap::flat_hash_set<T> _values;                                       
                   \
-        bool _dict_code_inited = false;                                        
                   \
-        phmap::flat_hash_set<int32_t> _dict_codes;                             
                   \
     };
 
 IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
-IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST)
+IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST)
 
 } //namespace doris
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 52cc584fdb..9484601f12 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -634,7 +634,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
             if (type == OLAP_FIELD_TYPE_VARCHAR || type == 
OLAP_FIELD_TYPE_CHAR ||
                 type == OLAP_FIELD_TYPE_STRING || predicate->type() == 
PredicateType::BF ||
                 predicate->type() == PredicateType::IN_LIST ||
-                predicate->type() == PredicateType::NO_IN_LIST) {
+                predicate->type() == PredicateType::NOT_IN_LIST) {
                 short_cir_pred_col_id_set.insert(cid);
                 _short_cir_eval_predicate.push_back(predicate);
                 _is_all_column_basic_type = false;
@@ -867,7 +867,6 @@ void 
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
             predicate->type() == PredicateType::GT || predicate->type() == 
PredicateType::GE) {
             col_ptr->convert_dict_codes_if_necessary();
         }
-        predicate->set_dict_code_if_necessary(*short_cir_column);
         predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, 
selected_size_ptr);
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to