This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 373aa35c603 [fix](inverted index) in_list support inverted index
#37921 #37842 (#38738)
373aa35c603 is described below
commit 373aa35c603e9bec5fb88cc3df5c1c2fbdc28ac9
Author: zzzxl <[email protected]>
AuthorDate: Sat Aug 3 06:25:09 2024 +0800
[fix](inverted index) in_list support inverted index #37921 #37842 (#38738)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 19 ++-
be/src/vec/exec/scan/vscan_node.cpp | 20 +--
.../data/inverted_index_p0/test_index_rqg_bug4.out | 18 +++
.../inverted_index_p0/test_index_rqg_bug4.groovy | 162 +++++++++++++++++++++
4 files changed, 209 insertions(+), 10 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index bfca52cec00..2dcf3b26aeb 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -737,9 +737,16 @@ Status
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
} else if (_is_literal_node(node_type)) {
auto v_literal_expr =
std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
_column_predicate_info->query_values.insert(v_literal_expr->value());
- } else if (node_type == TExprNodeType::BINARY_PRED || node_type ==
TExprNodeType::MATCH_PRED) {
+ } else if (node_type == TExprNodeType::BINARY_PRED || node_type ==
TExprNodeType::MATCH_PRED ||
+ node_type == TExprNodeType::IN_PRED) {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
+ } else if (node_type == TExprNodeType::IN_PRED) {
+ if (expr->op() == TExprOpcode::type::FILTER_IN) {
+ _column_predicate_info->query_op = "in";
+ } else {
+ _column_predicate_info->query_op = "not_in";
+ }
} else {
_column_predicate_info->query_op = expr->fn().name.function_name;
}
@@ -872,6 +879,10 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
pred_type == PredicateType::LT || pred_type ==
PredicateType::LE ||
pred_type == PredicateType::GT || pred_type ==
PredicateType::GE ||
pred_type == PredicateType::MATCH;
+ if
(_opts.runtime_state->query_options().enable_inverted_index_compound_inlist) {
+ is_support |= (pred_type == PredicateType::IN_LIST ||
+ pred_type == PredicateType::NOT_IN_LIST);
+ }
if (!is_support) {
_need_read_data_indices[column_id] = true;
continue;
@@ -2527,6 +2538,12 @@ void
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
} else {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
+ } else if (node_type == TExprNodeType::IN_PRED) {
+ if (expr->op() == TExprOpcode::type::FILTER_IN) {
+ _column_predicate_info->query_op = "in";
+ } else {
+ _column_predicate_info->query_op = "not_in";
+ }
} else if (node_type != TExprNodeType::COMPOUND_PRED) {
_column_predicate_info->query_op = expr->fn().name.function_name;
}
diff --git a/be/src/vec/exec/scan/vscan_node.cpp
b/be/src/vec/exec/scan/vscan_node.cpp
index 3b2711a4541..fe6195d2e3c 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -1133,21 +1133,23 @@ Status
VScanNode::_normalize_in_and_not_in_compound_predicate(vectorized::VExpr*
std::string fn_name =
expr->op() == TExprOpcode::type::FILTER_IN ? "in_list" :
"not_in_list";
+ for (const auto& child_expr : expr->children()) {
+ if (child_expr->node_type() == TExprNodeType::NULL_LITERAL) {
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
+ }
+ }
+
HybridSetBase::IteratorBase* iter = nullptr;
auto hybrid_set = expr->get_set_func();
if (hybrid_set != nullptr) {
- if (hybrid_set->size() <= _max_pushdown_conditions_per_column) {
- iter = hybrid_set->begin();
- } else {
- _filter_predicates.in_filters.emplace_back(slot->col_name(),
expr->get_set_func());
- *pdt = PushDownType::ACCEPTABLE;
- return Status::OK();
- }
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
} else {
- VInPredicate* pred = static_cast<VInPredicate*>(expr);
+ auto* pred = static_cast<vectorized::VInPredicate*>(expr);
- InState* state = reinterpret_cast<InState*>(
+ auto* state = reinterpret_cast<vectorized::InState*>(
expr_ctx->fn_context(pred->fn_context_index())
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
diff --git a/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
new file mode 100644
index 00000000000..784795cbbc1
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
+
diff --git
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+ def table1 = "test_index_rqg_bug4_table1"
+ def table2 = "test_index_rqg_bug4_table2"
+
+ sql "drop table if exists ${table1}"
+ sql "drop table if exists ${table2}"
+
+ sql """
+ create table ${table1} (
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY
RANGE(col_date_undef_signed_not_null_index_inverted) (
+ FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+ FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+ FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+ FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+ FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ create table ${table2} (
+ col_date_undef_signed_not_null date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY RANGE(col_date_undef_signed_not_null) (
+ PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+ PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+ PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+ PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+ PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+ PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+ PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+ PARTITION p100 VALUES LESS THAN ('9999-12-31')
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ insert into
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ sql """
+ insert into
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ try {
+ sql "sync"
+
+ qt_sql """
+ select
+ table1.col_varchar_1024__undef_signed_not_null as field1
+ from
+ ${table1} as table1
+ right join ${table2} as table2 on (
+ table2.col_date_undef_signed_index_inverted =
table1.col_date_undef_signed_index_inverted
+ )
+ where
+ not (
+ (
+ table2.`col_date_undef_signed_not_null_index_inverted` in (
+ '2027-01-16',
+ '2023-12-17',
+ '2024-02-18',
+ null,
+ '2000-10-18',
+ '2023-12-14',
+ '2023-12-18'
+ )
+ )
+ and table2.`col_date_undef_signed_not_null_index_inverted` <
'2025-06-18'
+ )
+ group by
+ field1
+ order by
+ field1
+ limit
+ 10000;
+ """
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]