This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new a5298d6 [Performance Improve] Push Down _conjunctf of 'not in' and
'!=' to Storage Engine. (#5207)
a5298d6 is described below
commit a5298d617daac8e4bf3ae473d8bb195f67ad15dd
Author: HappenLee <[email protected]>
AuthorDate: Sat Jan 23 21:07:01 2021 +0800
[Performance Improve] Push Down _conjunctf of 'not in' and '!=' to Storage
Engine. (#5207)
---
be/src/exec/olap_common.h | 65 +++++++++---
be/src/exec/olap_scan_node.cpp | 135 +++++++++++++++++++-----
be/src/exec/olap_scan_node.h | 8 +-
be/src/olap/olap_cond.cpp | 5 +-
be/src/olap/reader.cpp | 73 ++++++++++---
be/src/olap/rowset/segment_v2/column_reader.cpp | 1 +
be/test/exec/olap_common_test.cpp | 12 ++-
7 files changed, 234 insertions(+), 65 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index b28ddc5..ca71dba 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -62,7 +62,10 @@ public:
ColumnValueRange(std::string col_name, PrimitiveType type, const T& min,
const T& max, bool contain_null);
// should add fixed value before add range
- Status add_fixed_value(T value);
+ Status add_fixed_value(const T& value);
+
+ // should remove fixed value after add fixed value
+ void remove_fixed_value(const T& value);
Status add_range(SQLFilterOp op, T value);
@@ -109,24 +112,16 @@ public:
PrimitiveType type() const { return _column_type; }
+ const std::string& column_name() const { return _column_name; }
+
bool contain_null() const { return _contain_null; }
size_t get_fixed_value_size() const { return _fixed_values.size(); }
- void to_olap_filter(std::list<TCondition>& filters) {
+ void to_olap_filter(std::vector<TCondition>& filters) {
if (is_fixed_value_range()) {
// 1. convert to in filter condition
- TCondition condition;
- condition.__set_column_name(_column_name);
- condition.__set_condition_op("*=");
-
- for (const auto& value : _fixed_values) {
- condition.condition_values.push_back(cast_to_string(value));
- }
-
- if (condition.condition_values.size() != 0) {
- filters.push_back(condition);
- }
+ to_in_condition(filters, true);
} else if (_low_value < _high_value) {
// 2. convert to min max filter condition
TCondition null_pred;
@@ -178,6 +173,20 @@ public:
}
}
+ void to_in_condition(std::vector<TCondition>& filters, bool is_in = true) {
+ TCondition condition;
+ condition.__set_column_name(_column_name);
+ condition.__set_condition_op(is_in ? "*=" : "!*=");
+
+ for (const auto& value : _fixed_values) {
+ condition.condition_values.push_back(cast_to_string(value));
+ }
+
+ if (condition.condition_values.size() != 0) {
+ filters.push_back(condition);
+ }
+ }
+
void set_whole_value_range() {
_fixed_values.clear();
_low_value = TYPE_MIN;
@@ -207,8 +216,20 @@ public:
_contain_null = contain_null;
};
+ static void add_fixed_value_range(ColumnValueRange<T>& range, T* value) {
+ range.add_fixed_value(*value);
+ }
+
+ static void remove_fixed_value_range(ColumnValueRange<T>& range, T* value)
{
+ range.remove_fixed_value(*value);
+ }
+
static ColumnValueRange<T> create_empty_column_value_range(PrimitiveType
type) {
- return ColumnValueRange<T>("", type, TYPE_MAX, TYPE_MIN, false);
+ return ColumnValueRange<T>::create_empty_column_value_range("", type);
+ }
+
+ static ColumnValueRange<T> create_empty_column_value_range(const
std::string& col_name, PrimitiveType type) {
+ return ColumnValueRange<T>(col_name, type, TYPE_MAX, TYPE_MIN, false);
}
protected:
@@ -323,17 +344,26 @@ ColumnValueRange<T>::ColumnValueRange(std::string
col_name, PrimitiveType type,
_contain_null(contain_null){}
template <class T>
-Status ColumnValueRange<T>::add_fixed_value(T value) {
+Status ColumnValueRange<T>::add_fixed_value(const T& value) {
if (INVALID_TYPE == _column_type) {
return Status::InternalError("AddFixedValue failed, Invalid type");
}
_fixed_values.insert(value);
_contain_null = false;
+
+ _high_value = TYPE_MIN;
+ _low_value = TYPE_MAX;
+
return Status::OK();
}
template <class T>
+void ColumnValueRange<T>::remove_fixed_value(const T& value) {
+ _fixed_values.erase(value);
+}
+
+template <class T>
bool ColumnValueRange<T>::is_fixed_value_range() const {
return _fixed_values.size() != 0;
}
@@ -480,7 +510,6 @@ Status ColumnValueRange<T>::add_range(SQLFilterOp op, T
value) {
_low_value = TYPE_MAX;
} else {
if (_high_value > _low_value) {
-
switch (op) {
case FILTER_LARGER: {
if (value >= _low_value) {
@@ -593,7 +622,7 @@ void ColumnValueRange<T>::intersection(ColumnValueRange<T>&
range) {
}
std::set<T> result_values;
- // 3. fixed_value intersection, fixex value range do not contain null
+ // 3. fixed_value intersection, fixed value range do not contain null
if (is_fixed_value_range() || range.is_fixed_value_range()) {
if (is_fixed_value_range() && range.is_fixed_value_range()) {
set_intersection(_fixed_values.begin(), _fixed_values.end(),
range._fixed_values.begin(),
@@ -621,6 +650,8 @@ void ColumnValueRange<T>::intersection(ColumnValueRange<T>&
range) {
if (!result_values.empty()) {
_fixed_values = std::move(result_values);
_contain_null = false;
+ _high_value = TYPE_MIN;
+ _low_value = TYPE_MAX;
} else {
set_empty_value_range();
}
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index cde915e..1f1de3f 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -536,14 +536,12 @@ Status OlapScanNode::normalize_conjuncts() {
}
Status OlapScanNode::build_olap_filters() {
- _olap_filter.clear();
-
for (auto& iter : _column_value_ranges) {
ToOlapFilterVisitor visitor;
- boost::variant<std::list<TCondition>> filters;
+ boost::variant<std::vector<TCondition>> filters;
boost::apply_visitor(visitor, iter.second, filters);
- std::list<TCondition> new_filters =
boost::get<std::list<TCondition>>(filters);
+ std::vector<TCondition> new_filters =
boost::get<std::vector<TCondition>>(filters);
if (new_filters.empty()) {
continue;
}
@@ -734,13 +732,16 @@ Status
OlapScanNode::normalize_predicate(ColumnValueRange<T>& range, SlotDescrip
// 1. Normalize InPredicate, add to ColumnValueRange
RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range));
- // 2. Normalize BinaryPredicate , add to ColumnValueRange
+ // 2. Normalize NotInPredicate, add to ColumnValueRange
+ RETURN_IF_ERROR(normalize_not_in_and_not_eq_predicate(slot, &range));
+
+ // 3. Normalize BinaryPredicate , add to ColumnValueRange
RETURN_IF_ERROR(normalize_noneq_binary_predicate(slot, &range));
- // 3. Check whether range is empty, set _eos
+ // 4. Check whether range is empty, set _eos
if (range.is_empty_value_range()) _eos = true;
- // 4. Add range to Column->ColumnValueRange map
+ // 5. Add range to Column->ColumnValueRange map
_column_value_ranges[slot->col_name()] = range;
return Status::OK();
@@ -758,11 +759,6 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) {
bool OlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor *slot,
doris::InPredicate* pred) {
- if (pred->is_not_in()) {
- // can not push down NOT IN predicate to storage engine
- return false;
- }
-
if (Expr::type_without_cast(pred->get_child(0)) !=
TExprNodeType::SLOT_REF) {
// not a slot ref(column)
return false;
@@ -843,21 +839,17 @@ std::pair<bool, void*>
OlapScanNode::should_push_down_eq_predicate(doris::SlotDe
return result_pair;
}
-template <typename T>
-Status OlapScanNode::insert_value_to_range(doris::ColumnValueRange<T>&
temp_range, doris::PrimitiveType type, void *value) {
+template <typename T, typename ChangeFixedValueRangeFunc>
+Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
PrimitiveType type, void *value,
+ const ChangeFixedValueRangeFunc&
func) {
switch (type) {
- case TYPE_TINYINT: {
- int32_t v = *reinterpret_cast<int8_t*>(value);
- temp_range.add_fixed_value(*reinterpret_cast<T*>(&v));
- break;
- }
case TYPE_DATE: {
DateTimeValue date_value =
*reinterpret_cast<DateTimeValue*>(value);
// There is must return empty data in olap_scan_node,
// Because data value loss accuracy
if (!date_value.check_loss_accuracy_cast_to_date()) {
- temp_range.add_fixed_value(*reinterpret_cast<T*>(&date_value));
+ func(temp_range, reinterpret_cast<T*>(&date_value));
}
break;
}
@@ -867,16 +859,17 @@ Status
OlapScanNode::insert_value_to_range(doris::ColumnValueRange<T>& temp_rang
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_DATETIME:
+ case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
case TYPE_LARGEINT: {
- temp_range.add_fixed_value(*reinterpret_cast<T*>(value));
+ func(temp_range, reinterpret_cast<T*>(value));
break;
}
case TYPE_BOOLEAN: {
bool v = *reinterpret_cast<bool*>(value);
- temp_range.add_fixed_value(*reinterpret_cast<T*>(&v));
+ func(temp_range, reinterpret_cast<T*>(&v));
break;
}
default: {
@@ -916,7 +909,8 @@ Status
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
continue;
}
auto value = const_cast<void*>(iter->get_value());
- RETURN_IF_ERROR(insert_value_to_range(temp_range,
slot->type().type, value));
+ RETURN_IF_ERROR(change_fixed_value_range(temp_range,
slot->type().type, value,
+ ColumnValueRange<T>::add_fixed_value_range));
iter->next();
}
@@ -925,8 +919,7 @@ Status
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
}
range->intersection(temp_range);
} // end of handle in predicate
-
- // 2. Normalize eq conjuncts like 'where col = value'
+ // 2. Normalize eq conjuncts like 'where col = value'
else if (TExprNodeType::BINARY_PRED ==
_conjunct_ctxs[conj_idx]->root()->node_type() &&
FILTER_IN ==
to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) {
Expr* pred = _conjunct_ctxs[conj_idx]->root();
@@ -944,7 +937,8 @@ Status
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
auto value = result_pair.second;
// where A = NULL should return empty result set
if (value != nullptr) {
- RETURN_IF_ERROR(insert_value_to_range(temp_range,
slot->type().type, value));
+ RETURN_IF_ERROR(change_fixed_value_range(temp_range,
slot->type().type, value,
+ ColumnValueRange<T>::add_fixed_value_range));
}
if (is_key_column(slot->col_name())) {
@@ -965,6 +959,95 @@ Status
OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
return Status::OK();
}
+// Construct the ColumnValueRange for one specified column
+// It will only handle the NotInPredicate and not eq BinaryPredicate in
_conjunct_ctxs.
+// It will try to push down conditions of that column as much as possible,
+// But if the number of conditions exceeds the limit, none of conditions will
be pushed down.
+template <class T>
+Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor*
slot,
+ ColumnValueRange<T>* range)
{
+ // If the conjunct of slot is fixed value, will change the fixed value set
of column value range
+ // else add value to not in range and push down predicate directly
+ bool is_fixed_range = range->is_fixed_value_range();
+ auto not_in_range =
ColumnValueRange<T>::create_empty_column_value_range(range->column_name(),
range->type());
+
+ std::vector<uint32_t> filter_conjuncts_index;
+ for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
+ // 1. Normalize in conjuncts like 'where col not in (v1, v2, v3)'
+ if (TExprOpcode::FILTER_NOT_IN ==
_conjunct_ctxs[conj_idx]->root()->op()) {
+ InPredicate* pred =
dynamic_cast<InPredicate*>(_conjunct_ctxs[conj_idx]->root());
+ if (!should_push_down_in_predicate(slot, pred)) {
+ continue;
+ }
+
+ // begin to push InPredicate value into ColumnValueRange
+ auto iter = pred->hybrid_set()->begin();
+ while (iter->has_next()) {
+ // column not in (NULL) is always true
+ if (NULL == iter->get_value()) {
+ continue;
+ }
+ auto value = const_cast<void*>(iter->get_value());
+ if (is_fixed_range) {
+ RETURN_IF_ERROR(change_fixed_value_range(*range,
slot->type().type, value,
+ ColumnValueRange<T>::remove_fixed_value_range));
+ } else {
+ RETURN_IF_ERROR(change_fixed_value_range(not_in_range,
slot->type().type, value,
+
ColumnValueRange<T>::add_fixed_value_range));
+ }
+ iter->next();
+ }
+
+ // only where a in ('a', 'b', NULL) contain NULL will
+ // clear temp_range to whole range, no need do intersection
+ if (is_key_column(slot->col_name())) {
+ filter_conjuncts_index.emplace_back(conj_idx);
+ }
+ } // end of handle not in predicate
+
+ // 2. Normalize eq conjuncts like 'where col != value'
+ if (TExprNodeType::BINARY_PRED ==
_conjunct_ctxs[conj_idx]->root()->node_type() &&
+ FILTER_NOT_IN ==
to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) {
+ Expr* pred = _conjunct_ctxs[conj_idx]->root();
+ DCHECK(pred->get_num_children() == 2);
+
+ for (int child_idx = 0; child_idx < 2; ++child_idx) {
+ // TODO: should use C++17 structured bindlings to refactor
this code in the future:
+ // 'auto [should_push_down, value] =
should_push_down_eq_predicate(slot, pred, conj_idx, child_idx);'
+ // make code tidier and readabler
+ auto result_pair = should_push_down_eq_predicate(slot, pred,
conj_idx, child_idx);
+ if (!result_pair.first) {
+ continue;
+ }
+ auto value = result_pair.second;
+
+ if (is_fixed_range) {
+ RETURN_IF_ERROR(change_fixed_value_range(*range,
slot->type().type, value,
+
ColumnValueRange<T>::remove_fixed_value_range));
+ } else {
+ RETURN_IF_ERROR(change_fixed_value_range(not_in_range,
slot->type().type, value,
+
ColumnValueRange<T>::add_fixed_value_range));
+ }
+
+ if (is_key_column(slot->col_name())) {
+ filter_conjuncts_index.emplace_back(conj_idx);
+ }
+ } // end for each binary predicate child
+ } // end of handling eq binary predicate
+ }
+
+ // exceed limit, no conditions will be pushed down to storage engine.
+ if (is_fixed_range || not_in_range.get_fixed_value_size() <=
_max_pushdown_conditions_per_column) {
+ if (!is_fixed_range) {
+ // push down not in condition to storage engine
+ not_in_range.to_in_condition(_olap_filter, false);
+ }
+ std::copy(filter_conjuncts_index.cbegin(),
filter_conjuncts_index.cend(),
+ std::inserter(_pushed_conjuncts_index,
_pushed_conjuncts_index.begin()));
+ }
+ return Status::OK();
+}
+
template <typename T>
bool OlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor*
slot,
const std::string& is_null_str, ColumnValueRange<T>* range) {
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 728e088..34d80fd 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -154,6 +154,9 @@ protected:
Status normalize_in_and_eq_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range);
template <class T>
+ Status normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range);
+
+ template <class T>
Status normalize_noneq_binary_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range);
template <typename T>
@@ -178,8 +181,9 @@ private:
std::pair<bool, void*> should_push_down_eq_predicate(SlotDescriptor* slot,
Expr* pred, int conj_idx, int child_idx);
- template <typename T>
- static Status insert_value_to_range(ColumnValueRange<T>& range,
PrimitiveType type, void* value);
+ template <typename T, typename ChangeFixedValueRangeFunc>
+ static Status change_fixed_value_range(ColumnValueRange <T> &range,
PrimitiveType type, void *value,
+ const
ChangeFixedValueRangeFunc& func);
friend class OlapScanner;
diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp
index 231da7b..c9184c3 100644
--- a/be/src/olap/olap_cond.cpp
+++ b/be/src/olap/olap_cond.cpp
@@ -230,7 +230,7 @@ bool Cond::eval(const std::pair<WrapperField*,
WrapperField*>& statistic) const
operand_field->cmp(statistic.second) <= 0;
}
case OP_NE: {
- return operand_field->cmp(statistic.first) < 0 ||
operand_field->cmp(statistic.second) > 0;
+ return true;
}
case OP_LT: {
return operand_field->cmp(statistic.first) > 0;
@@ -249,8 +249,7 @@ bool Cond::eval(const std::pair<WrapperField*,
WrapperField*>& statistic) const
max_value_field->cmp(statistic.first) >= 0;
}
case OP_NOT_IN: {
- return min_value_field->cmp(statistic.second) > 0 ||
- max_value_field->cmp(statistic.first) < 0;
+ return true;
}
case OP_IS: {
if (operand_field->is_null()) {
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index a40e5a9..a9945cc 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -691,15 +691,17 @@ COMPARISON_PREDICATE_CONDITION_VALUE(gt, GreaterPredicate)
COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate)
ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) {
- // TODO: not equal and not in predicate is not pushed down
int32_t index = _tablet->field_index(condition.column_name);
if (index < 0) {
return nullptr;
}
+
const TabletColumn& column = _tablet->tablet_schema().column(index);
ColumnPredicate* predicate = nullptr;
- if (condition.condition_op == "*=" && condition.condition_values.size() ==
1) {
- predicate = _new_eq_pred(column, index, condition.condition_values[0]);
+ if ((condition.condition_op == "*=" || condition.condition_op == "!*=" ||
condition.condition_op == "=" || condition.condition_op == "!=")
+ && condition.condition_values.size() == 1) {
+ predicate = condition.condition_op == "*=" || condition.condition_op
== "=" ? _new_eq_pred(column, index, condition.condition_values[0]) :
+ _new_ne_pred(column, index, condition.condition_values[0]);
} else if (condition.condition_op == "<<") {
predicate = _new_lt_pred(column, index, condition.condition_values[0]);
} else if (condition.condition_op == "<=") {
@@ -708,7 +710,7 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
predicate = _new_gt_pred(column, index, condition.condition_values[0]);
} else if (condition.condition_op == ">=") {
predicate = _new_ge_pred(column, index, condition.condition_values[0]);
- } else if (condition.condition_op == "*=" &&
condition.condition_values.size() > 1) {
+ } else if ((condition.condition_op == "*=" || condition.condition_op ==
"!*=") && condition.condition_values.size() > 1) {
switch (column.type()) {
case OLAP_FIELD_TYPE_TINYINT: {
std::set<int8_t> values;
@@ -718,7 +720,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
ss >> value;
values.insert(value);
}
- predicate = new InListPredicate<int8_t>(index, std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<int8_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<int8_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_SMALLINT: {
@@ -729,7 +735,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
ss >> value;
values.insert(value);
}
- predicate = new InListPredicate<int16_t>(index, std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<int16_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<int16_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_INT: {
@@ -740,7 +750,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
ss >> value;
values.insert(value);
}
- predicate = new InListPredicate<int32_t>(index, std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<int32_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<int32_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_BIGINT: {
@@ -751,7 +765,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
ss >> value;
values.insert(value);
}
- predicate = new InListPredicate<int64_t>(index, std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<int64_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<int64_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_LARGEINT: {
@@ -762,7 +780,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
ss >> value;
values.insert(value);
}
- predicate = new InListPredicate<int128_t>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<int128_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<int128_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_DECIMAL: {
@@ -772,7 +794,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
value.from_string(cond_val);
values.insert(value);
}
- predicate = new InListPredicate<decimal12_t>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<decimal12_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<decimal12_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_CHAR: {
@@ -787,7 +813,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
value.ptr = buffer;
values.insert(value);
}
- predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<StringValue>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_VARCHAR: {
@@ -801,7 +831,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
value.ptr = buffer;
values.insert(value);
}
- predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<StringValue>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<StringValue>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_DATE: {
@@ -810,7 +844,11 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
uint24_t value = timestamp_from_date(cond_val);
values.insert(value);
}
- predicate = new InListPredicate<uint24_t>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<uint24_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<uint24_t>(index,
std::move(values));
+ }
break;
}
case OLAP_FIELD_TYPE_DATETIME: {
@@ -819,17 +857,20 @@ ColumnPredicate* Reader::_parse_to_predicate(const
TCondition& condition) {
uint64_t value = timestamp_from_datetime(cond_val);
values.insert(value);
}
- predicate = new InListPredicate<uint64_t>(index,
std::move(values));
+ if (condition.condition_op == "*=") {
+ predicate = new InListPredicate<uint64_t>(index,
std::move(values));
+ } else {
+ predicate = new NotInListPredicate<uint64_t>(index,
std::move(values));
+ }
break;
}
// OLAP_FIELD_TYPE_BOOL is not valid in this case.
default:
- break;
+ break;
}
} else if (condition.condition_op == "is") {
predicate = new NullPredicate(index, condition.condition_values[0] ==
"null");
}
-
return predicate;
}
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index cfe213f..6452d6d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -155,6 +155,7 @@ bool ColumnReader::match_condition(CondColumn* cond) const {
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type,
_meta.length()));
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type,
_meta.length()));
_parse_zone_map(_zone_map_index_meta->segment_zone_map(), min_value.get(),
max_value.get());
+
return _zone_map_match_condition(_zone_map_index_meta->segment_zone_map(),
min_value.get(),
max_value.get(), cond);
}
diff --git a/be/test/exec/olap_common_test.cpp
b/be/test/exec/olap_common_test.cpp
index afc1b50..c06d3e3 100644
--- a/be/test/exec/olap_common_test.cpp
+++ b/be/test/exec/olap_common_test.cpp
@@ -654,7 +654,7 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
TEST_F(OlapScanKeysTest, ToOlapFilterTest) {
ColumnValueRange<int32_t> range("col", TYPE_INT);
- std::list<TCondition> filters;
+ std::vector<TCondition> filters;
range.to_olap_filter(filters);
ASSERT_TRUE(filters.empty());
@@ -720,6 +720,16 @@ TEST_F(OlapScanKeysTest, ToOlapFilterTest) {
ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30");
ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40");
ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50");
+
+ filters.clear();
+ range.to_in_condition(filters, false);
+ ASSERT_EQ(std::next(filters.begin(), 0)->column_name, "col");
+ ASSERT_EQ(std::next(filters.begin(), 0)->condition_op, "!*=");
+ ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20");
+ ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30");
+ ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40");
+ ASSERT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50");
+
ASSERT_TRUE(range.add_range(FILTER_LARGER, 20).ok());
filters.clear();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]