This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 01921dea248 [fix](inverted index) Incorrect handling of NULL literal
#37921 #37842 (#38741)
01921dea248 is described below
commit 01921dea248f86d8c979811e0dedf237b448aaa5
Author: zzzxl <[email protected]>
AuthorDate: Thu Aug 1 23:18:48 2024 +0800
[fix](inverted index) Incorrect handling of NULL literal #37921 #37842
(#38741)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 5 +
be/src/pipeline/exec/scan_operator.cpp | 19 ++-
.../test_index_inlist_fault_injection.out | 48 ++++++
.../test_index_rqg_bug4.out} | 31 ++--
.../test_index_inlist_fault_injection.groovy | 29 ++++
.../inverted_index_p0/test_index_rqg_bug2.groovy | 2 +-
.../inverted_index_p0/test_index_rqg_bug4.groovy | 162 +++++++++++++++++++++
7 files changed, 269 insertions(+), 27 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index acc46ce3204..09ad056189a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2849,6 +2849,11 @@ void
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = static_cast<const
doris::vectorized::VLiteral*>(expr.get());
_column_predicate_info->query_values.insert(v_literal_expr->value());
+ } else if (node_type == TExprNodeType::NULL_LITERAL) {
+ if (!_column_predicate_info->column_name.empty()) {
+ auto v_literal_expr = static_cast<const
doris::vectorized::VLiteral*>(expr.get());
+
_column_predicate_info->query_values.insert(v_literal_expr->value());
+ }
} else {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index a72514fd0b1..c3fd0457d5e 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -1100,18 +1100,12 @@ Status
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
auto hybrid_set = expr->get_set_func();
if (hybrid_set != nullptr) {
- if (hybrid_set->size() <=
- _parent->cast<typename
Derived::Parent>()._max_pushdown_conditions_per_column) {
- iter = hybrid_set->begin();
- } else {
- _filter_predicates.in_filters.emplace_back(slot->col_name(),
expr->get_set_func());
- *pdt = PushDownType::ACCEPTABLE;
- return Status::OK();
- }
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
} else {
- vectorized::VInPredicate* pred =
static_cast<vectorized::VInPredicate*>(expr);
+ auto* pred = static_cast<vectorized::VInPredicate*>(expr);
- vectorized::InState* state =
reinterpret_cast<vectorized::InState*>(
+ auto* state = reinterpret_cast<vectorized::InState*>(
expr_ctx->fn_context(pred->fn_context_index())
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
@@ -1120,6 +1114,11 @@ Status
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
}
iter = state->hybrid_set->begin();
+
+ if (state->hybrid_set->contain_null()) {
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
+ }
}
while (iter->has_next()) {
diff --git
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
index 9fbd1c8e252..528b4008084 100644
---
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -17,3 +17,51 @@
-- !sql --
208
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+3
+
+-- !sql --
+2
+
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+3
+
+-- !sql --
+2
+
diff --git
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
similarity index 57%
copy from
regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
copy to regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
index 9fbd1c8e252..784795cbbc1 100644
---
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -1,19 +1,18 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !sql --
-8
-
--- !sql --
-996
-
--- !sql --
-210
-
--- !sql --
-8
-
--- !sql --
-998
-
--- !sql --
-208
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
diff --git
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
index 52f9ed5c656..8d22c001ed0 100644
---
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -90,6 +90,35 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
} finally {
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
}
+
+ try {
+ sql """ set enable_common_expr_pushdown = true; """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip not in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip =
'2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip =
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
+
+ sql """ set enable_common_expr_pushdown = false; """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip not in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip =
'2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip =
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
+
+ sql """ set enable_common_expr_pushdown = true; """
+ } finally {
+ }
} finally {
}
}
\ No newline at end of file
diff --git
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
index bddbfdc4ab0..faede0bc3c1 100644
--- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-suite("test_index_rqg_bug2", "test_index_rqg_bug2"){
+suite("test_index_rqg_bug2", "test_index_rqg_bug"){
def table1 = "test_index_rqg_bug2"
sql "drop table if exists ${table1}"
diff --git
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+ def table1 = "test_index_rqg_bug4_table1"
+ def table2 = "test_index_rqg_bug4_table2"
+
+ sql "drop table if exists ${table1}"
+ sql "drop table if exists ${table2}"
+
+ sql """
+ create table ${table1} (
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY
RANGE(col_date_undef_signed_not_null_index_inverted) (
+ FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+ FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+ FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+ FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+ FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ create table ${table2} (
+ col_date_undef_signed_not_null date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY RANGE(col_date_undef_signed_not_null) (
+ PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+ PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+ PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+ PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+ PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+ PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+ PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+ PARTITION p100 VALUES LESS THAN ('9999-12-31')
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ insert into
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ sql """
+ insert into
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ try {
+ sql "sync"
+
+ qt_sql """
+ select
+ table1.col_varchar_1024__undef_signed_not_null as field1
+ from
+ ${table1} as table1
+ right join ${table2} as table2 on (
+ table2.col_date_undef_signed_index_inverted =
table1.col_date_undef_signed_index_inverted
+ )
+ where
+ not (
+ (
+ table2.`col_date_undef_signed_not_null_index_inverted` in (
+ '2027-01-16',
+ '2023-12-17',
+ '2024-02-18',
+ null,
+ '2000-10-18',
+ '2023-12-14',
+ '2023-12-18'
+ )
+ )
+ and table2.`col_date_undef_signed_not_null_index_inverted` <
'2025-06-18'
+ )
+ group by
+ field1
+ order by
+ field1
+ limit
+ 10000;
+ """
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]