This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a5298d6  [Performance Improve] Push Down _conjunctf of 'not in' and 
'!=' to Storage Engine. (#5207)
a5298d6 is described below

commit a5298d617daac8e4bf3ae473d8bb195f67ad15dd
Author: HappenLee <[email protected]>
AuthorDate: Sat Jan 23 21:07:01 2021 +0800

    [Performance Improve] Push Down _conjunctf of 'not in' and '!=' to Storage 
Engine. (#5207)
---
 be/src/exec/olap_common.h                       |  65 +++++++++---
 be/src/exec/olap_scan_node.cpp                  | 135 +++++++++++++++++++-----
 be/src/exec/olap_scan_node.h                    |   8 +-
 be/src/olap/olap_cond.cpp                       |   5 +-
 be/src/olap/reader.cpp                          |  73 ++++++++++---
 be/src/olap/rowset/segment_v2/column_reader.cpp |   1 +
 be/test/exec/olap_common_test.cpp               |  12 ++-
 7 files changed, 234 insertions(+), 65 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index b28ddc5..ca71dba 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -62,7 +62,10 @@ public:
     ColumnValueRange(std::string col_name, PrimitiveType type, const T& min, 
const T& max, bool contain_null);
 
     // should add fixed value before add range
-    Status add_fixed_value(T value);
+    Status add_fixed_value(const T& value);
+
+    // should remove fixed value after add fixed value
+    void remove_fixed_value(const T& value);
 
     Status add_range(SQLFilterOp op, T value);
 
@@ -109,24 +112,16 @@ public:
 
     PrimitiveType type() const { return _column_type; }
 
+    const std::string& column_name() const { return _column_name; }
+
     bool contain_null() const { return _contain_null; }
 
     size_t get_fixed_value_size() const { return _fixed_values.size(); }
 
-    void to_olap_filter(std::list<TCondition>& filters) {
+    void to_olap_filter(std::vector<TCondition>& filters) {
         if (is_fixed_value_range()) {
             // 1. convert to in filter condition
-            TCondition condition;
-            condition.__set_column_name(_column_name);
-            condition.__set_condition_op("*=");
-
-            for (const auto& value : _fixed_values) {
-                condition.condition_values.push_back(cast_to_string(value));
-            }
-
-            if (condition.condition_values.size() != 0) {
-                filters.push_back(condition);
-            }
+            to_in_condition(filters, true);
         } else if (_low_value < _high_value) {
             // 2. convert to min max filter condition
             TCondition null_pred;
@@ -178,6 +173,20 @@ public:
         }
     }
 
+    void to_in_condition(std::vector<TCondition>& filters, bool is_in = true) {
+        TCondition condition;
+        condition.__set_column_name(_column_name);
+        condition.__set_condition_op(is_in ? "*=" : "!*=");
+
+        for (const auto& value : _fixed_values) {
+            condition.condition_values.push_back(cast_to_string(value));
+        }
+
+        if (condition.condition_values.size() != 0) {
+            filters.push_back(condition);
+        }
+    }
+
     void set_whole_value_range() {
         _fixed_values.clear();
         _low_value = TYPE_MIN;
@@ -207,8 +216,20 @@ public:
         _contain_null = contain_null;
     };
 
+    static void add_fixed_value_range(ColumnValueRange<T>& range, T* value) {
+        range.add_fixed_value(*value);
+    }
+
+    static void remove_fixed_value_range(ColumnValueRange<T>& range, T* value) 
{
+        range.remove_fixed_value(*value);
+    }
+
     static ColumnValueRange<T> create_empty_column_value_range(PrimitiveType 
type) {
-        return ColumnValueRange<T>("", type, TYPE_MAX, TYPE_MIN, false);
+        return ColumnValueRange<T>::create_empty_column_value_range("", type);
+    }
+
+    static ColumnValueRange<T> create_empty_column_value_range(const 
std::string& col_name, PrimitiveType type) {
+        return ColumnValueRange<T>(col_name, type, TYPE_MAX, TYPE_MIN, false);
     }
 
 protected:
@@ -323,17 +344,26 @@ ColumnValueRange<T>::ColumnValueRange(std::string 
col_name, PrimitiveType type,
           _contain_null(contain_null){}
 
 template <class T>
-Status ColumnValueRange<T>::add_fixed_value(T value) {
+Status ColumnValueRange<T>::add_fixed_value(const T& value) {
     if (INVALID_TYPE == _column_type) {
         return Status::InternalError("AddFixedValue failed, Invalid type");
     }
 
     _fixed_values.insert(value);
     _contain_null = false;
+
+    _high_value = TYPE_MIN;
+    _low_value = TYPE_MAX;
+
     return Status::OK();
 }
 
 template <class T>
+void ColumnValueRange<T>::remove_fixed_value(const T& value) {
+    _fixed_values.erase(value);
+}
+
+template <class T>
 bool ColumnValueRange<T>::is_fixed_value_range() const {
     return _fixed_values.size() != 0;
 }
@@ -480,7 +510,6 @@ Status ColumnValueRange<T>::add_range(SQLFilterOp op, T 
value) {
         _low_value = TYPE_MAX;
     } else {
         if (_high_value > _low_value) {
-
             switch (op) {
             case FILTER_LARGER: {
                 if (value >= _low_value) {
@@ -593,7 +622,7 @@ void ColumnValueRange<T>::intersection(ColumnValueRange<T>& 
range) {
     }
 
     std::set<T> result_values;
-    // 3. fixed_value intersection, fixex value range do not contain null
+    // 3. fixed_value intersection, fixed value range do not contain null
     if (is_fixed_value_range() || range.is_fixed_value_range()) {
         if (is_fixed_value_range() && range.is_fixed_value_range()) {
             set_intersection(_fixed_values.begin(), _fixed_values.end(), 
range._fixed_values.begin(),
@@ -621,6 +650,8 @@ void ColumnValueRange<T>::intersection(ColumnValueRange<T>& 
range) {
         if (!result_values.empty()) {
             _fixed_values = std::move(result_values);
             _contain_null = false;
+            _high_value = TYPE_MIN;
+            _low_value = TYPE_MAX;
         } else {
             set_empty_value_range();
         }
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index cde915e..1f1de3f 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -536,14 +536,12 @@ Status OlapScanNode::normalize_conjuncts() {
 }
 
 Status OlapScanNode::build_olap_filters() {
-    _olap_filter.clear();
-
     for (auto& iter : _column_value_ranges) {
         ToOlapFilterVisitor visitor;
-        boost::variant<std::list<TCondition>> filters;
+        boost::variant<std::vector<TCondition>> filters;
         boost::apply_visitor(visitor, iter.second, filters);
 
-        std::list<TCondition> new_filters = 
boost::get<std::list<TCondition>>(filters);
+        std::vector<TCondition> new_filters = 
boost::get<std::vector<TCondition>>(filters);
         if (new_filters.empty()) {
             continue;
         }
@@ -734,13 +732,16 @@ Status 
OlapScanNode::normalize_predicate(ColumnValueRange<T>& range, SlotDescrip
     // 1. Normalize InPredicate, add to ColumnValueRange
     RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range));
 
-    // 2. Normalize BinaryPredicate , add to ColumnValueRange
+    // 2. Normalize NotInPredicate, add to ColumnValueRange
+    RETURN_IF_ERROR(normalize_not_in_and_not_eq_predicate(slot, &range));
+
+    // 3. Normalize BinaryPredicate , add to ColumnValueRange
     RETURN_IF_ERROR(normalize_noneq_binary_predicate(slot, &range));
 
-    // 3. Check whether range is empty, set _eos
+    // 4. Check whether range is empty, set _eos
     if (range.is_empty_value_range()) _eos = true;
 
-    // 4. Add range to Column->ColumnValueRange map
+    // 5. Add range to Column->ColumnValueRange map
     _column_value_ranges[slot->col_name()] = range;
 
     return Status::OK();
@@ -758,11 +759,6 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) {
 
 
 bool OlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor *slot, 
doris::InPredicate* pred) {
-    if (pred->is_not_in()) {
-        // can not push down NOT IN predicate to storage engine
-        return false;
-    }
-
     if (Expr::type_without_cast(pred->get_child(0)) != 
TExprNodeType::SLOT_REF) {
         // not a slot ref(column)
         return false;
@@ -843,21 +839,17 @@ std::pair<bool, void*> 
OlapScanNode::should_push_down_eq_predicate(doris::SlotDe
     return result_pair;
 }
 
-template <typename T>
-Status OlapScanNode::insert_value_to_range(doris::ColumnValueRange<T>& 
temp_range, doris::PrimitiveType type, void *value) {
+template <typename T, typename ChangeFixedValueRangeFunc>
+Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, 
PrimitiveType type, void *value,
+                                              const ChangeFixedValueRangeFunc& 
func) {
     switch (type) {
-        case TYPE_TINYINT: {
-            int32_t v = *reinterpret_cast<int8_t*>(value);
-            temp_range.add_fixed_value(*reinterpret_cast<T*>(&v));
-            break;
-        }
         case TYPE_DATE: {
             DateTimeValue date_value =
                     *reinterpret_cast<DateTimeValue*>(value);
             // There is must return empty data in olap_scan_node,
             // Because data value loss accuracy
             if (!date_value.check_loss_accuracy_cast_to_date()) {
-                temp_range.add_fixed_value(*reinterpret_cast<T*>(&date_value));
+                func(temp_range, reinterpret_cast<T*>(&date_value));
             }
             break;
         }
@@ -867,16 +859,17 @@ Status 
OlapScanNode::insert_value_to_range(doris::ColumnValueRange<T>& temp_rang
         case TYPE_VARCHAR:
         case TYPE_HLL:
         case TYPE_DATETIME:
+        case TYPE_TINYINT:
         case TYPE_SMALLINT:
         case TYPE_INT:
         case TYPE_BIGINT:
         case TYPE_LARGEINT: {
-            temp_range.add_fixed_value(*reinterpret_cast<T*>(value));
+            func(temp_range, reinterpret_cast<T*>(value));
             break;
         }
         case TYPE_BOOLEAN: {
             bool v = *reinterpret_cast<bool*>(value);
-            temp_range.add_fixed_value(*reinterpret_cast<T*>(&v));
+            func(temp_range, reinterpret_cast<T*>(&v));
             break;
         }
         default: {
@@ -916,7 +909,8 @@ Status 
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
                     continue;
                 }
                 auto value = const_cast<void*>(iter->get_value());
-                RETURN_IF_ERROR(insert_value_to_range(temp_range, 
slot->type().type, value));
+                RETURN_IF_ERROR(change_fixed_value_range(temp_range, 
slot->type().type, value,
+                        ColumnValueRange<T>::add_fixed_value_range));
                 iter->next();
             }
 
@@ -925,8 +919,7 @@ Status 
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
             }
             range->intersection(temp_range);
         } // end of handle in predicate
-
-            // 2. Normalize eq conjuncts like 'where col = value'
+        // 2. Normalize eq conjuncts like 'where col = value'
         else if (TExprNodeType::BINARY_PRED == 
_conjunct_ctxs[conj_idx]->root()->node_type() &&
                  FILTER_IN == 
to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) {
             Expr* pred = _conjunct_ctxs[conj_idx]->root();
@@ -944,7 +937,8 @@ Status 
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
                 auto value = result_pair.second;
                 // where A = NULL should return empty result set
                 if (value != nullptr) {
-                    RETURN_IF_ERROR(insert_value_to_range(temp_range, 
slot->type().type, value));
+                    RETURN_IF_ERROR(change_fixed_value_range(temp_range, 
slot->type().type, value,
+                            ColumnValueRange<T>::add_fixed_value_range));
                 }
 
                 if (is_key_column(slot->col_name())) {
@@ -965,6 +959,95 @@ Status 
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
     return Status::OK();
 }
 
+// Construct the ColumnValueRange for one specified column
+// It will only handle the NotInPredicate and not eq BinaryPredicate in 
_conjunct_ctxs.
+// It will try to push down conditions of that column as much as possible,
+// But if the number of conditions exceeds the limit, none of conditions will 
be pushed down.
+template <class T>
+Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* 
slot,
+                                                   ColumnValueRange<T>* range) 
{
+    // If the conjunct of slot is fixed value, will change the fixed value set 
of column value range
+    // else add value to not in range and push down predicate directly
+    bool is_fixed_range = range->is_fixed_value_range();
+    auto not_in_range = 
ColumnValueRange<T>::create_empty_column_value_range(range->column_name(), 
range->type());
+
+    std::vector<uint32_t> filter_conjuncts_index;
+    for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
+        // 1. Normalize in conjuncts like 'where col not in (v1, v2, v3)'
+        if (TExprOpcode::FILTER_NOT_IN == 
_conjunct_ctxs[conj_idx]->root()->op()) {
+            InPredicate* pred = 
dynamic_cast<InPredicate*>(_conjunct_ctxs[conj_idx]->root());
+            if (!should_push_down_in_predicate(slot, pred)) {
+                continue;
+            }
+
+            // begin to push InPredicate value into ColumnValueRange
+            auto iter = pred->hybrid_set()->begin();
+            while (iter->has_next()) {
+                // column not in (NULL) is always true
+                if (NULL == iter->get_value()) {
+                    continue;
+                }
+                auto value = const_cast<void*>(iter->get_value());
+                if (is_fixed_range) {
+                    RETURN_IF_ERROR(change_fixed_value_range(*range, 
slot->type().type, value,
+                            ColumnValueRange<T>::remove_fixed_value_range));
+                } else {
+                    RETURN_IF_ERROR(change_fixed_value_range(not_in_range, 
slot->type().type, value,
+                                                             
ColumnValueRange<T>::add_fixed_value_range));
+                }
+                iter->next();
+            }
+
+            // only where a in ('a', 'b', NULL) contain NULL will
+            // clear temp_range to whole range, no need do intersection
+            if (is_key_column(slot->col_name())) {
+                filter_conjuncts_index.emplace_back(conj_idx);
+            }
+        } // end of handle not in predicate
+
+        // 2. Normalize eq conjuncts like 'where col != value'
+        if (TExprNodeType::BINARY_PRED == 
_conjunct_ctxs[conj_idx]->root()->node_type() &&
+            FILTER_NOT_IN == 
to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) {
+            Expr* pred = _conjunct_ctxs[conj_idx]->root();
+            DCHECK(pred->get_num_children() == 2);
+
+            for (int child_idx = 0; child_idx < 2; ++child_idx) {
+                // TODO: should use C++17 structured bindlings to refactor 
this code in the future:
+                // 'auto [should_push_down, value] = 
should_push_down_eq_predicate(slot, pred, conj_idx, child_idx);'
+                // make code tidier and readabler
+                auto result_pair = should_push_down_eq_predicate(slot, pred, 
conj_idx, child_idx);
+                if (!result_pair.first) {
+                    continue;
+                }
+                auto value = result_pair.second;
+
+                if (is_fixed_range) {
+                    RETURN_IF_ERROR(change_fixed_value_range(*range, 
slot->type().type, value,
+                                                                     
ColumnValueRange<T>::remove_fixed_value_range));
+                } else {
+                    RETURN_IF_ERROR(change_fixed_value_range(not_in_range, 
slot->type().type, value,
+                                                                     
ColumnValueRange<T>::add_fixed_value_range));
+                }
+
+                if (is_key_column(slot->col_name())) {
+                    filter_conjuncts_index.emplace_back(conj_idx);
+                }
+            } // end for each binary predicate child
+        } // end of handling eq binary predicate
+    }
+
+    // exceed limit, no conditions will be pushed down to storage engine.
+    if (is_fixed_range || not_in_range.get_fixed_value_size() <= 
_max_pushdown_conditions_per_column) {
+        if (!is_fixed_range) {
+            // push down not in condition to storage engine
+            not_in_range.to_in_condition(_olap_filter, false);
+        }
+        std::copy(filter_conjuncts_index.cbegin(), 
filter_conjuncts_index.cend(),
+                  std::inserter(_pushed_conjuncts_index, 
_pushed_conjuncts_index.begin()));
+    }
+    return Status::OK();
+}
+
 template <typename T>
 bool OlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* 
slot,
                 const std::string& is_null_str, ColumnValueRange<T>* range) {
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 728e088..34d80fd 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -154,6 +154,9 @@ protected:
     Status normalize_in_and_eq_predicate(SlotDescriptor* slot, 
ColumnValueRange<T>* range);
 
     template <class T>
+    Status normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot, 
ColumnValueRange<T>* range);
+
+    template <class T>
     Status normalize_noneq_binary_predicate(SlotDescriptor* slot, 
ColumnValueRange<T>* range);
 
     template <typename T>
@@ -178,8 +181,9 @@ private:
 
     std::pair<bool, void*> should_push_down_eq_predicate(SlotDescriptor* slot, 
Expr* pred, int conj_idx, int child_idx);
 
-    template <typename T>
-    static Status insert_value_to_range(ColumnValueRange<T>& range, 
PrimitiveType type, void* value);
+    template <typename T, typename ChangeFixedValueRangeFunc>
+    static Status change_fixed_value_range(ColumnValueRange <T> &range, 
PrimitiveType type, void *value,
+                                               const 
ChangeFixedValueRangeFunc& func);
 
     friend class OlapScanner;
 
diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp
index 231da7b..c9184c3 100644
--- a/be/src/olap/olap_cond.cpp
+++ b/be/src/olap/olap_cond.cpp
@@ -230,7 +230,7 @@ bool Cond::eval(const std::pair<WrapperField*, 
WrapperField*>& statistic) const
                operand_field->cmp(statistic.second) <= 0;
     }
     case OP_NE: {
-        return operand_field->cmp(statistic.first) < 0 || 
operand_field->cmp(statistic.second) > 0;
+        return true;
     }
     case OP_LT: {
         return operand_field->cmp(statistic.first) > 0;
@@ -249,8 +249,7 @@ bool Cond::eval(const std::pair<WrapperField*, 
WrapperField*>& statistic) const
                max_value_field->cmp(statistic.first) >= 0;
     }
     case OP_NOT_IN: {
-        return min_value_field->cmp(statistic.second) > 0 ||
-               max_value_field->cmp(statistic.first) < 0;
+        return true;
     }
     case OP_IS: {
         if (operand_field->is_null()) {
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index a40e5a9..a9945cc 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -691,15 +691,17 @@ COMPARISON_PREDICATE_CONDITION_VALUE(gt, GreaterPredicate)
 COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate)
 
 ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) {
-    // TODO: not equal and not in predicate is not pushed down
     int32_t index = _tablet->field_index(condition.column_name);
     if (index < 0) {
         return nullptr;
     }
+
     const TabletColumn& column = _tablet->tablet_schema().column(index);
     ColumnPredicate* predicate = nullptr;
-    if (condition.condition_op == "*=" && condition.condition_values.size() == 
1) {
-        predicate = _new_eq_pred(column, index, condition.condition_values[0]);
+    if ((condition.condition_op == "*=" || condition.condition_op == "!*=" || 
condition.condition_op == "=" || condition.condition_op == "!=")
+                && condition.condition_values.size() == 1) {
+        predicate = condition.condition_op == "*=" || condition.condition_op 
== "=" ? _new_eq_pred(column, index, condition.condition_values[0]) :
+                _new_ne_pred(column, index, condition.condition_values[0]);
     } else if (condition.condition_op == "<<") {
         predicate = _new_lt_pred(column, index, condition.condition_values[0]);
     } else if (condition.condition_op == "<=") {
@@ -708,7 +710,7 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
         predicate = _new_gt_pred(column, index, condition.condition_values[0]);
     } else if (condition.condition_op == ">=") {
         predicate = _new_ge_pred(column, index, condition.condition_values[0]);
-    } else if (condition.condition_op == "*=" && 
condition.condition_values.size() > 1) {
+    } else if ((condition.condition_op == "*=" || condition.condition_op == 
"!*=") && condition.condition_values.size() > 1) {
         switch (column.type()) {
         case OLAP_FIELD_TYPE_TINYINT: {
             std::set<int8_t> values;
@@ -718,7 +720,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 ss >> value;
                 values.insert(value);
             }
-            predicate = new InListPredicate<int8_t>(index, std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<int8_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<int8_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_SMALLINT: {
@@ -729,7 +735,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 ss >> value;
                 values.insert(value);
             }
-            predicate = new InListPredicate<int16_t>(index, std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<int16_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<int16_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_INT: {
@@ -740,7 +750,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 ss >> value;
                 values.insert(value);
             }
-            predicate = new InListPredicate<int32_t>(index, std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<int32_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<int32_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_BIGINT: {
@@ -751,7 +765,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 ss >> value;
                 values.insert(value);
             }
-            predicate = new InListPredicate<int64_t>(index, std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<int64_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<int64_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_LARGEINT: {
@@ -762,7 +780,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 ss >> value;
                 values.insert(value);
             }
-            predicate = new InListPredicate<int128_t>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<int128_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<int128_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_DECIMAL: {
@@ -772,7 +794,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 value.from_string(cond_val);
                 values.insert(value);
             }
-            predicate = new InListPredicate<decimal12_t>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<decimal12_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<decimal12_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_CHAR: {
@@ -787,7 +813,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 value.ptr = buffer;
                 values.insert(value);
             }
-            predicate = new InListPredicate<StringValue>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<StringValue>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<StringValue>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_VARCHAR: {
@@ -801,7 +831,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 value.ptr = buffer;
                 values.insert(value);
             }
-            predicate = new InListPredicate<StringValue>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<StringValue>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<StringValue>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_DATE: {
@@ -810,7 +844,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 uint24_t value = timestamp_from_date(cond_val);
                 values.insert(value);
             }
-            predicate = new InListPredicate<uint24_t>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<uint24_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<uint24_t>(index, 
std::move(values));
+            }
             break;
         }
         case OLAP_FIELD_TYPE_DATETIME: {
@@ -819,17 +857,20 @@ ColumnPredicate* Reader::_parse_to_predicate(const 
TCondition& condition) {
                 uint64_t value = timestamp_from_datetime(cond_val);
                 values.insert(value);
             }
-            predicate = new InListPredicate<uint64_t>(index, 
std::move(values));
+            if (condition.condition_op == "*=") {
+                predicate = new InListPredicate<uint64_t>(index, 
std::move(values));
+            } else {
+                predicate = new NotInListPredicate<uint64_t>(index, 
std::move(values));
+            }
             break;
         }
         // OLAP_FIELD_TYPE_BOOL is not valid in this case.
         default:
-            break;
+           break;
         }
     } else if (condition.condition_op == "is") {
         predicate = new NullPredicate(index, condition.condition_values[0] == 
"null");
     }
-
     return predicate;
 }
 
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index cfe213f..6452d6d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -155,6 +155,7 @@ bool ColumnReader::match_condition(CondColumn* cond) const {
     std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, 
_meta.length()));
     std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, 
_meta.length()));
     _parse_zone_map(_zone_map_index_meta->segment_zone_map(), min_value.get(), 
max_value.get());
+
     return _zone_map_match_condition(_zone_map_index_meta->segment_zone_map(), 
min_value.get(),
                                      max_value.get(), cond);
 }
diff --git a/be/test/exec/olap_common_test.cpp 
b/be/test/exec/olap_common_test.cpp
index afc1b50..c06d3e3 100644
--- a/be/test/exec/olap_common_test.cpp
+++ b/be/test/exec/olap_common_test.cpp
@@ -654,7 +654,7 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
 TEST_F(OlapScanKeysTest, ToOlapFilterTest) {
     ColumnValueRange<int32_t> range("col", TYPE_INT);
 
-    std::list<TCondition> filters;
+    std::vector<TCondition> filters;
     range.to_olap_filter(filters);
     ASSERT_TRUE(filters.empty());
 
@@ -720,6 +720,16 @@ TEST_F(OlapScanKeysTest, ToOlapFilterTest) {
     ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30");
     ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40");
     ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50");
+    
+    filters.clear();
+    range.to_in_condition(filters, false);
+    ASSERT_EQ(std::next(filters.begin(), 0)->column_name, "col");
+    ASSERT_EQ(std::next(filters.begin(), 0)->condition_op, "!*=");
+    ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20");
+    ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30");
+    ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40");
+    ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50");
+
 
     ASSERT_TRUE(range.add_range(FILTER_LARGER, 20).ok());
     filters.clear();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to